Parses a mediawiki export and generates an analysis of content for RepRap Wiki
#!/usr/bin/env groovy
import groovy.time.*
// Export File from, using category: "Development"
def f2= new File('RepRapWiki-DevelopmentPages_LatestRevisions.xml')
def records = new XmlSlurper().parseText(f2.getText())
def pages =
def multiDevelopmentCount = 0
def developmentStubCount = 0
def taggedAsDevelopmentCount = 0
def developmentIndex = []
def categoryIndex = new TreeMap([:])
def lastUpdatedIndex = ['LessThan1DayAgo':[],'LessThan1WeekAgo':[],'LessThan1MonthAgo':[],'LessThan6MonthsAgo':[],'LessThan1YearAgo':[],'LessThan3YearsAgo':[],'MoreThan3YearsAgo':[]]
class Development
def title
def lastUpdated
def basedOn
def categories
def status
def isDevelopmentStub
def isMultiDevelopment
def niceDate = { d ->
use (TimeCategory) {
if (d.after( return "[[#LessThan1DayAgo|< 1 day ago]]"
else if (d.after(1.week.ago)) return "[[#LessThan1WeekAgo|< 1 week ago]]"
else if (d.after(1.month.ago)) return "[[#LessThan1MonthAgo|< 1 month ago]]"
else if (d.after(6.months.ago)) return "[[#LessThan6MonthsAgo|< 6 months ago]]"
else if (d.after(1.year.ago)) return "[[#LessThan1YearAgo|< 1 year ago]]"
else if (d.after(3.years.ago)) return "[[#LessThan3YearsAgo|< 3 year ago]]"
else return "[[#MoreThan3YearsAgo|> 3 years ago"
return TimeCategory.minus( new Date(), lastUpdated ).toString()
public String toString()
def stubMarker = isDevelopmentStub?" (Stub)":""
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${niceDate(lastUpdated)}''</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>"""
public String toStringWithDate()
def stubMarker = isDevelopmentStub?" (Stub)":""
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${lastUpdated.format('dd MMM yy')}</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>"""
pages.each { page ->
if (page.title.text().startsWith("Category:")) {
def revision = page.revision
def text = revision.text.text()
if (!text.contains("{{Development\n")) {
def development = new Development()
development.title = page.title.text()
development.isDevelopmentStub = text.contains("{{Development:Stub}}")
development.isMultiDevelopment = text.contains("{{MultiDevelopment}}")
development.lastUpdated = new Date().parse('yyyy-MM-dd\'T\'HH:mm:ss\'Z\'', revision.timestamp.text())
development.basedOn = "undefined"
def statusMatcher = text =~ "^/|status = (.*)\n"
def basedOnMatcher = text =~ "^/|reprap = (.*)\n"
def categoriesMatcher = text =~ "^/|categories = (.*)\n"
development.status = "undefined"
if (statusMatcher && !statusMatcher[0][1].isEmpty())
development.status = statusMatcher[0][1]
if (basedOnMatcher && !basedOnMatcher[0][1].isEmpty() && !basedOnMatcher[0][1].equals('?'))
development.basedOn = basedOnMatcher[0][1]
if (development.isDevelopmentStub)
if (development.isMultiDevelopment)
development.categories = []
def categoryCount = 0
if (categoriesMatcher)
def categoryMatcher = categoriesMatcher[0][1] =~ /\[\[Category:([.[^\]]]*)\]\]/
if (categoryMatcher)
{ categoryMatch ->
def category = categoryMatch[1].split(/\|/)[0]
development.categories << category
developmentIndex << development
use (TimeCategory) {
if (development.lastUpdated.after( lastUpdatedIndex['LessThan1DayAgo'] << development
else if (development.lastUpdated.after(1.week.ago)) lastUpdatedIndex['LessThan1WeekAgo'] << development
else if (development.lastUpdated.after(1.month.ago)) lastUpdatedIndex['LessThan1MonthAgo'] << development
else if (development.lastUpdated.after(6.months.ago)) lastUpdatedIndex['LessThan6MonthsAgo'] << development
else if (development.lastUpdated.after(1.year.ago)) lastUpdatedIndex['LessThan1YearAgo'] << development
else if (development.lastUpdated.after(3.years.ago)) lastUpdatedIndex['LessThan3YearsAgo'] << development
else lastUpdatedIndex['MoreThan3YearsAgo'] << development
development.categories.each { category ->
if (!categoryIndex[category])
categoryIndex[category] = []
categoryIndex[category] << development
println "==Development Index Analysis=="
println "{{Notice|'''Warning: This page is generated from a [[|script]]. Please do not modify by hand as changes could be lost.'''}}"
use (TimeCategory) {
println "Page Generated: ${1.second.ago}"
println "<br />Development Pages: ${taggedAsDevelopmentCount}"
println "<br />Development Stubs: ${developmentStubCount}"
println "<br />Multi Development: ${multiDevelopmentCount}"
println "<br />Jump To: [[#ActiveDevelopments|Active Developments]]"
println "<hr>"
println "__TOC__"
def titleComparator = { a, b -> a.title <=> b.title } as Comparator
def ageComparator = { a, b -> b.lastUpdated <=> a.lastUpdated } as Comparator
def tableHeader = { "<table width=100% border=1 cellspacing=0 cellpadding=2><tr><th width=40%>Title</th><th width=10%>Status</th><th width=10%>Last Updated</th><th width=40%>Categories</th></tr>"}
def tableFooter = { "</table>"}
def printDevelopments = { developments ->
if (developments.size() > 0)
println tableHeader()
developments.each{ development ->
println development
println tableFooter()
println "==<div id=\"IndexByStatus\">Index By Status</div>=="
developmentIndex.groupBy { it.status.toLowerCase() }.each { status, developments->
println "===<div id=\"${status.capitalize()}\">${status.capitalize()} (${developments.size()})</div>==="
println "==<div id=\"IndexByCategory\">Index By Category</div>=="
categoryIndex.each { cat, developments ->
println "===<div id=\"${cat.capitalize()}\">${cat.capitalize()} (${developments.size()})</div>==="
//Based On
println "==<div id=\"IndexByBasedOn\">Index By \"Based On\"</div>=="
developmentIndex.groupBy { it.basedOn.toLowerCase() }.each { basedOn, developments->
println "===<div id=\"${basedOn.capitalize()}\">${basedOn.capitalize()} (${developments.size()})</div>==="
//Last Updated
println "==<div id=\"IndexByLastUpdated\">Index By \"Last Updated\" (ordered by descending last updated date)</div>=="
lastUpdatedIndex.each { lastUpdated, developments ->
println "===<div id=\"${lastUpdated}\">${lastUpdated} (${developments.size()})</div>==="
if (developments.size() > 0)
println tableHeader()
developments.sort(ageComparator).each{ development ->
println development.toStringWithDate()
println tableFooter()
def activeDevelopments = developmentIndex.findAll
{ development ->
use (TimeCategory) {
if (['abandoned', 'obsolete'].contains(development.status.toLowerCase())) return false
if (['working'].contains(development.status.toLowerCase())) return true
if (development.isDevelopmentStub) return false
//Other: experimental, undefined, concept, prototype, pre alpha, stable, design, implementation, unknown, just started, unproven, working on a prototype, startup phase
if (development.lastUpdated.after(6.months.ago))
return true
return false
println "==<div id=\"ActiveDevelopments\">Active Developments (${activeDevelopments.size()})</div>=="
println "Active is where:"
println "* Status is not Abandoned or Obsolete"
println "* Status is working"
println "* Page is not a development stub and Page was updated in the last 6 months"
