peralta/trac-static-dumper.py

## trac-static-dumper.py
#!/usr/bin/env python
"""
dirty and filthy script to migrate trac tickets to a static site, based on
their rss feed.

will take all xml files in data/ dir and create the new files with .html
extension. also accepts single xml file as input.

depends on feedparser (pip install feedparser)

to retrieve the tickets for a trac install, just do:

let i=1
while [ $i -lt 45000 ]
do
  curl -s "https://trac.example.com/projects/exampleproject/ticket/$i?format=rss" > data/t$i.xml
  i=$(($i + 1))
done
"""


import feedparser
import glob
import re
import sys


body_template = """<!DOCTYPE html>
<html lang="en">
<head>
<title>%(title)s</title>
<link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet">
</head>
<body>
<div class="navbar navbar-fixed-top">
 <div class="navbar-inner">
  <div class="container">
   <a class="brand" href="#">Tuenti.com ancient trac archive</a>
  </div>
 </div>
</div>


<div class="container">
<h1>%(title)s</h1>
<div class="hero-unit">
%(subtitle)s
</div>
<br />
%(entries_html)s

</div>
</body>
"""

entry_template = """
<hr />
<h3>%(published)s - %(author)s: %(title)s</h3>
%(description)s
"""

ticket_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/ticket/)(\d+)')
changeset_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/changeset/)([a-f0-9]+)([^"]+)')

def replace_links(text):
    def link_ticket(match):
        return 't' + match.group(2) + '.html'
    def link_changeset(match):
        return 'http://fisheye.tuenti.int/changelog/release?cs=' + match.group(2)
    t = re.sub(ticket_re, link_ticket, text)
    t = re.sub(changeset_re, link_changeset, t)
    return t


def generate_html(feed):
    entries_html = ""
    for entry in feed.entries:
        entry['description'] = replace_links(entry['description'])
        entries_html += entry_template % entry


    feed.feed['entries_html'] = entries_html
    feed.feed['subtitle'] = replace_links(feed.feed['subtitle'])
    return body_template % feed.feed


if __name__ == '__main__':

    fnames = glob.glob("data/*xml")
    if len(sys.argv) > 1:
        fnames = sys.argv[1:]


    for fname in fnames:
        try:
            feed = feedparser.parse(file(fname, "r").read())
            print "generating feed for", fname
            html = generate_html(feed)
            fd_name = fname.replace("xml", "html")
            fd = file(fd_name, "w")
            fd.write(html.encode('iso-8859-1'))
            fd.close()
        except:
            print "problem generating html for", fname
        #break
	#!/usr/bin/env python
	"""
	dirty and filthy script to migrate trac tickets to a static site, based on
	their rss feed.

	will take all xml files in data/ dir and create the new files with .html
	extension. also accepts single xml file as input.

	depends on feedparser (pip install feedparser)

	to retrieve the tickets for a trac install, just do:

	let i=1
	while [ $i -lt 45000 ]
	do
	curl -s "https://trac.example.com/projects/exampleproject/ticket/$i?format=rss" > data/t$i.xml
	i=$(($i + 1))
	done
	"""


	import feedparser
	import glob
	import re
	import sys


	body_template = """<!DOCTYPE html>
	<html lang="en">
	<head>
	<title>%(title)s</title>
	<link href="http://twitter.github.com/bootstrap/assets/css/bootstrap.css" rel="stylesheet">
	</head>
	<body>
	<div class="navbar navbar-fixed-top">
	<div class="navbar-inner">
	<div class="container">
	<a class="brand" href="#">Tuenti.com ancient trac archive</a>
	</div>
	</div>
	</div>


	<div class="container">
	<h1>%(title)s</h1>
	<div class="hero-unit">
	%(subtitle)s
	</div>
	<br />
	%(entries_html)s

	</div>
	</body>
	"""

	entry_template = """
	<hr />
	<h3>%(published)s - %(author)s: %(title)s</h3>
	%(description)s
	"""

	ticket_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/ticket/)(\d+)')
	changeset_re = re.compile('(https://trac.tuenti.com/projects/tuenti.com/changeset/)([a-f0-9]+)([^"]+)')

	def replace_links(text):
	def link_ticket(match):
	return 't' + match.group(2) + '.html'
	def link_changeset(match):
	return 'http://fisheye.tuenti.int/changelog/release?cs=' + match.group(2)
	t = re.sub(ticket_re, link_ticket, text)
	t = re.sub(changeset_re, link_changeset, t)
	return t


	def generate_html(feed):
	entries_html = ""
	for entry in feed.entries:
	entry['description'] = replace_links(entry['description'])
	entries_html += entry_template % entry


	feed.feed['entries_html'] = entries_html
	feed.feed['subtitle'] = replace_links(feed.feed['subtitle'])
	return body_template % feed.feed



	if __name__ == '__main__':

	fnames = glob.glob("data/*xml")
	if len(sys.argv) > 1:
	fnames = sys.argv[1:]


	for fname in fnames:
	try:
	feed = feedparser.parse(file(fname, "r").read())
	print "generating feed for", fname
	html = generate_html(feed)
	fd_name = fname.replace("xml", "html")
	fd = file(fd_name, "w")
	fd.write(html.encode('iso-8859-1'))
	fd.close()
	except:
	print "problem generating html for", fname
	#break