Skip to content

Instantly share code, notes, and snippets.

@garyhodgson
Created May 17, 2011 23:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save garyhodgson/977622 to your computer and use it in GitHub Desktop.
Save garyhodgson/977622 to your computer and use it in GitHub Desktop.
Parses a mediawiki export and generates an analysis of content for RepRap Wiki
#!/usr/bin/env groovy
import groovy.time.*
// Export File from http://reprap.org/wiki/Special:Export, using category: "Development"
def f2= new File('RepRapWiki-DevelopmentPages_LatestRevisions.xml')
def records = new XmlSlurper().parseText(f2.getText())
def pages = records.page
def multiDevelopmentCount = 0
def developmentStubCount = 0
def taggedAsDevelopmentCount = 0
def developmentIndex = []
def categoryIndex = new TreeMap([:])
def lastUpdatedIndex = ['LessThan1DayAgo':[],'LessThan1WeekAgo':[],'LessThan1MonthAgo':[],'LessThan6MonthsAgo':[],'LessThan1YearAgo':[],'LessThan3YearsAgo':[],'MoreThan3YearsAgo':[]]
class Development
{
def title
def lastUpdated
def basedOn
def categories
def status
def isDevelopmentStub
def isMultiDevelopment
def niceDate = { d ->
use (TimeCategory) {
if (d.after(1.day.ago)) return "[[#LessThan1DayAgo|< 1 day ago]]"
else if (d.after(1.week.ago)) return "[[#LessThan1WeekAgo|< 1 week ago]]"
else if (d.after(1.month.ago)) return "[[#LessThan1MonthAgo|< 1 month ago]]"
else if (d.after(6.months.ago)) return "[[#LessThan6MonthsAgo|< 6 months ago]]"
else if (d.after(1.year.ago)) return "[[#LessThan1YearAgo|< 1 year ago]]"
else if (d.after(3.years.ago)) return "[[#LessThan3YearsAgo|< 3 year ago]]"
else return "[[#MoreThan3YearsAgo|> 3 years ago"
return TimeCategory.minus( new Date(), lastUpdated ).toString()
}
}
public String toString()
{
def stubMarker = isDevelopmentStub?" (Stub)":""
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${niceDate(lastUpdated)}''</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>"""
}
public String toStringWithDate()
{
def stubMarker = isDevelopmentStub?" (Stub)":""
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${lastUpdated.format('dd MMM yy')}</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>"""
}
}
pages.each { page ->
if (page.title.text().startsWith("Category:")) {
return
}
def revision = page.revision
def text = revision.text.text()
if (!text.contains("{{Development\n")) {
return
}
taggedAsDevelopmentCount++
def development = new Development()
development.title = page.title.text()
development.isDevelopmentStub = text.contains("{{Development:Stub}}")
development.isMultiDevelopment = text.contains("{{MultiDevelopment}}")
development.lastUpdated = new Date().parse('yyyy-MM-dd\'T\'HH:mm:ss\'Z\'', revision.timestamp.text())
development.basedOn = "undefined"
def statusMatcher = text =~ "^/|status = (.*)\n"
def basedOnMatcher = text =~ "^/|reprap = (.*)\n"
def categoriesMatcher = text =~ "^/|categories = (.*)\n"
development.status = "undefined"
if (statusMatcher && !statusMatcher[0][1].isEmpty())
{
development.status = statusMatcher[0][1]
}
if (basedOnMatcher && !basedOnMatcher[0][1].isEmpty() && !basedOnMatcher[0][1].equals('?'))
{
development.basedOn = basedOnMatcher[0][1]
}
if (development.isDevelopmentStub)
{
developmentStubCount++
}
if (development.isMultiDevelopment)
{
multiDevelopmentCount++
}
development.categories = []
def categoryCount = 0
if (categoriesMatcher)
{
def categoryMatcher = categoriesMatcher[0][1] =~ /\[\[Category:([.[^\]]]*)\]\]/
if (categoryMatcher)
{
categoryMatcher.each
{ categoryMatch ->
def category = categoryMatch[1].split(/\|/)[0]
categoryCount++
development.categories << category
}
}
}
developmentIndex << development
use (TimeCategory) {
if (development.lastUpdated.after(1.day.ago)) lastUpdatedIndex['LessThan1DayAgo'] << development
else if (development.lastUpdated.after(1.week.ago)) lastUpdatedIndex['LessThan1WeekAgo'] << development
else if (development.lastUpdated.after(1.month.ago)) lastUpdatedIndex['LessThan1MonthAgo'] << development
else if (development.lastUpdated.after(6.months.ago)) lastUpdatedIndex['LessThan6MonthsAgo'] << development
else if (development.lastUpdated.after(1.year.ago)) lastUpdatedIndex['LessThan1YearAgo'] << development
else if (development.lastUpdated.after(3.years.ago)) lastUpdatedIndex['LessThan3YearsAgo'] << development
else lastUpdatedIndex['MoreThan3YearsAgo'] << development
}
development.categories.each { category ->
if (!categoryIndex[category])
{
categoryIndex[category] = []
}
categoryIndex[category] << development
}
}
println "==Development Index Analysis=="
println "{{Notice|'''Warning: This page is generated from a [[https://gist.github.com/977622|script]]. Please do not modify by hand as changes could be lost.'''}}"
use (TimeCategory) {
println "Page Generated: ${1.second.ago}"
}
println "<br />Development Pages: ${taggedAsDevelopmentCount}"
println "<br />Development Stubs: ${developmentStubCount}"
println "<br />Multi Development: ${multiDevelopmentCount}"
println "<br />Jump To: [[#ActiveDevelopments|Active Developments]]"
println "<hr>"
println "__TOC__"
def titleComparator = { a, b -> a.title <=> b.title } as Comparator
def ageComparator = { a, b -> b.lastUpdated <=> a.lastUpdated } as Comparator
def tableHeader = { "<table width=100% border=1 cellspacing=0 cellpadding=2><tr><th width=40%>Title</th><th width=10%>Status</th><th width=10%>Last Updated</th><th width=40%>Categories</th></tr>"}
def tableFooter = { "</table>"}
def printDevelopments = { developments ->
if (developments.size() > 0)
{
println tableHeader()
developments.each{ development ->
println development
}
println tableFooter()
}
}
//Status
println "==<div id=\"IndexByStatus\">Index By Status</div>=="
developmentIndex.groupBy { it.status.toLowerCase() }.each { status, developments->
println "===<div id=\"${status.capitalize()}\">${status.capitalize()} (${developments.size()})</div>==="
printDevelopments(developments.sort(titleComparator))
}
//Category
println "==<div id=\"IndexByCategory\">Index By Category</div>=="
categoryIndex.each { cat, developments ->
println "===<div id=\"${cat.capitalize()}\">${cat.capitalize()} (${developments.size()})</div>==="
printDevelopments(developments.sort(titleComparator))
}
//Based On
println "==<div id=\"IndexByBasedOn\">Index By \"Based On\"</div>=="
developmentIndex.groupBy { it.basedOn.toLowerCase() }.each { basedOn, developments->
println "===<div id=\"${basedOn.capitalize()}\">${basedOn.capitalize()} (${developments.size()})</div>==="
printDevelopments(developments.sort(titleComparator))
}
//Last Updated
println "==<div id=\"IndexByLastUpdated\">Index By \"Last Updated\" (ordered by descending last updated date)</div>=="
lastUpdatedIndex.each { lastUpdated, developments ->
println "===<div id=\"${lastUpdated}\">${lastUpdated} (${developments.size()})</div>==="
if (developments.size() > 0)
{
println tableHeader()
developments.sort(ageComparator).each{ development ->
println development.toStringWithDate()
}
println tableFooter()
}
}
//Active
def activeDevelopments = developmentIndex.findAll
{ development ->
use (TimeCategory) {
if (['abandoned', 'obsolete'].contains(development.status.toLowerCase())) return false
if (['working'].contains(development.status.toLowerCase())) return true
if (development.isDevelopmentStub) return false
//Other: experimental, undefined, concept, prototype, pre alpha, stable, design, implementation, unknown, just started, unproven, working on a prototype, startup phase
if (development.lastUpdated.after(6.months.ago))
{
return true
}
return false
}
}
println "==<div id=\"ActiveDevelopments\">Active Developments (${activeDevelopments.size()})</div>=="
println "Active is where:"
println "* Status is not Abandoned or Obsolete"
println "* Status is working"
println "* Page is not a development stub and Page was updated in the last 6 months"
printDevelopments(activeDevelopments.sort(ageComparator))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment