Created
May 17, 2011 23:07
-
-
Save garyhodgson/977622 to your computer and use it in GitHub Desktop.
Parses a mediawiki export and generates an analysis of content for RepRap Wiki
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env groovy | |
import groovy.time.* | |
// Export File from http://reprap.org/wiki/Special:Export, using category: "Development" | |
def f2= new File('RepRapWiki-DevelopmentPages_LatestRevisions.xml') | |
def records = new XmlSlurper().parseText(f2.getText()) | |
def pages = records.page | |
def multiDevelopmentCount = 0 | |
def developmentStubCount = 0 | |
def taggedAsDevelopmentCount = 0 | |
def developmentIndex = [] | |
def categoryIndex = new TreeMap([:]) | |
def lastUpdatedIndex = ['LessThan1DayAgo':[],'LessThan1WeekAgo':[],'LessThan1MonthAgo':[],'LessThan6MonthsAgo':[],'LessThan1YearAgo':[],'LessThan3YearsAgo':[],'MoreThan3YearsAgo':[]] | |
class Development | |
{ | |
def title | |
def lastUpdated | |
def basedOn | |
def categories | |
def status | |
def isDevelopmentStub | |
def isMultiDevelopment | |
def niceDate = { d -> | |
use (TimeCategory) { | |
if (d.after(1.day.ago)) return "[[#LessThan1DayAgo|< 1 day ago]]" | |
else if (d.after(1.week.ago)) return "[[#LessThan1WeekAgo|< 1 week ago]]" | |
else if (d.after(1.month.ago)) return "[[#LessThan1MonthAgo|< 1 month ago]]" | |
else if (d.after(6.months.ago)) return "[[#LessThan6MonthsAgo|< 6 months ago]]" | |
else if (d.after(1.year.ago)) return "[[#LessThan1YearAgo|< 1 year ago]]" | |
else if (d.after(3.years.ago)) return "[[#LessThan3YearsAgo|< 3 year ago]]" | |
else return "[[#MoreThan3YearsAgo|> 3 years ago" | |
return TimeCategory.minus( new Date(), lastUpdated ).toString() | |
} | |
} | |
public String toString() | |
{ | |
def stubMarker = isDevelopmentStub?" (Stub)":"" | |
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${niceDate(lastUpdated)}''</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>""" | |
} | |
public String toStringWithDate() | |
{ | |
def stubMarker = isDevelopmentStub?" (Stub)":"" | |
"""<tr><td>'''[[${title}]]${stubMarker}'''</td><td>[[#${status.capitalize()}|${status.capitalize()}]]</td><td>''${lastUpdated.format('dd MMM yy')}</td><td>${categories.collect(){ "[[#${it}|${it}]]" }.join(', ')}</td></tr>""" | |
} | |
} | |
pages.each { page -> | |
if (page.title.text().startsWith("Category:")) { | |
return | |
} | |
def revision = page.revision | |
def text = revision.text.text() | |
if (!text.contains("{{Development\n")) { | |
return | |
} | |
taggedAsDevelopmentCount++ | |
def development = new Development() | |
development.title = page.title.text() | |
development.isDevelopmentStub = text.contains("{{Development:Stub}}") | |
development.isMultiDevelopment = text.contains("{{MultiDevelopment}}") | |
development.lastUpdated = new Date().parse('yyyy-MM-dd\'T\'HH:mm:ss\'Z\'', revision.timestamp.text()) | |
development.basedOn = "undefined" | |
def statusMatcher = text =~ "^/|status = (.*)\n" | |
def basedOnMatcher = text =~ "^/|reprap = (.*)\n" | |
def categoriesMatcher = text =~ "^/|categories = (.*)\n" | |
development.status = "undefined" | |
if (statusMatcher && !statusMatcher[0][1].isEmpty()) | |
{ | |
development.status = statusMatcher[0][1] | |
} | |
if (basedOnMatcher && !basedOnMatcher[0][1].isEmpty() && !basedOnMatcher[0][1].equals('?')) | |
{ | |
development.basedOn = basedOnMatcher[0][1] | |
} | |
if (development.isDevelopmentStub) | |
{ | |
developmentStubCount++ | |
} | |
if (development.isMultiDevelopment) | |
{ | |
multiDevelopmentCount++ | |
} | |
development.categories = [] | |
def categoryCount = 0 | |
if (categoriesMatcher) | |
{ | |
def categoryMatcher = categoriesMatcher[0][1] =~ /\[\[Category:([.[^\]]]*)\]\]/ | |
if (categoryMatcher) | |
{ | |
categoryMatcher.each | |
{ categoryMatch -> | |
def category = categoryMatch[1].split(/\|/)[0] | |
categoryCount++ | |
development.categories << category | |
} | |
} | |
} | |
developmentIndex << development | |
use (TimeCategory) { | |
if (development.lastUpdated.after(1.day.ago)) lastUpdatedIndex['LessThan1DayAgo'] << development | |
else if (development.lastUpdated.after(1.week.ago)) lastUpdatedIndex['LessThan1WeekAgo'] << development | |
else if (development.lastUpdated.after(1.month.ago)) lastUpdatedIndex['LessThan1MonthAgo'] << development | |
else if (development.lastUpdated.after(6.months.ago)) lastUpdatedIndex['LessThan6MonthsAgo'] << development | |
else if (development.lastUpdated.after(1.year.ago)) lastUpdatedIndex['LessThan1YearAgo'] << development | |
else if (development.lastUpdated.after(3.years.ago)) lastUpdatedIndex['LessThan3YearsAgo'] << development | |
else lastUpdatedIndex['MoreThan3YearsAgo'] << development | |
} | |
development.categories.each { category -> | |
if (!categoryIndex[category]) | |
{ | |
categoryIndex[category] = [] | |
} | |
categoryIndex[category] << development | |
} | |
} | |
println "==Development Index Analysis==" | |
println "{{Notice|'''Warning: This page is generated from a [[https://gist.github.com/977622|script]]. Please do not modify by hand as changes could be lost.'''}}" | |
use (TimeCategory) { | |
println "Page Generated: ${1.second.ago}" | |
} | |
println "<br />Development Pages: ${taggedAsDevelopmentCount}" | |
println "<br />Development Stubs: ${developmentStubCount}" | |
println "<br />Multi Development: ${multiDevelopmentCount}" | |
println "<br />Jump To: [[#ActiveDevelopments|Active Developments]]" | |
println "<hr>" | |
println "__TOC__" | |
def titleComparator = { a, b -> a.title <=> b.title } as Comparator | |
def ageComparator = { a, b -> b.lastUpdated <=> a.lastUpdated } as Comparator | |
def tableHeader = { "<table width=100% border=1 cellspacing=0 cellpadding=2><tr><th width=40%>Title</th><th width=10%>Status</th><th width=10%>Last Updated</th><th width=40%>Categories</th></tr>"} | |
def tableFooter = { "</table>"} | |
def printDevelopments = { developments -> | |
if (developments.size() > 0) | |
{ | |
println tableHeader() | |
developments.each{ development -> | |
println development | |
} | |
println tableFooter() | |
} | |
} | |
//Status | |
println "==<div id=\"IndexByStatus\">Index By Status</div>==" | |
developmentIndex.groupBy { it.status.toLowerCase() }.each { status, developments-> | |
println "===<div id=\"${status.capitalize()}\">${status.capitalize()} (${developments.size()})</div>===" | |
printDevelopments(developments.sort(titleComparator)) | |
} | |
//Category | |
println "==<div id=\"IndexByCategory\">Index By Category</div>==" | |
categoryIndex.each { cat, developments -> | |
println "===<div id=\"${cat.capitalize()}\">${cat.capitalize()} (${developments.size()})</div>===" | |
printDevelopments(developments.sort(titleComparator)) | |
} | |
//Based On | |
println "==<div id=\"IndexByBasedOn\">Index By \"Based On\"</div>==" | |
developmentIndex.groupBy { it.basedOn.toLowerCase() }.each { basedOn, developments-> | |
println "===<div id=\"${basedOn.capitalize()}\">${basedOn.capitalize()} (${developments.size()})</div>===" | |
printDevelopments(developments.sort(titleComparator)) | |
} | |
//Last Updated | |
println "==<div id=\"IndexByLastUpdated\">Index By \"Last Updated\" (ordered by descending last updated date)</div>==" | |
lastUpdatedIndex.each { lastUpdated, developments -> | |
println "===<div id=\"${lastUpdated}\">${lastUpdated} (${developments.size()})</div>===" | |
if (developments.size() > 0) | |
{ | |
println tableHeader() | |
developments.sort(ageComparator).each{ development -> | |
println development.toStringWithDate() | |
} | |
println tableFooter() | |
} | |
} | |
//Active | |
def activeDevelopments = developmentIndex.findAll | |
{ development -> | |
use (TimeCategory) { | |
if (['abandoned', 'obsolete'].contains(development.status.toLowerCase())) return false | |
if (['working'].contains(development.status.toLowerCase())) return true | |
if (development.isDevelopmentStub) return false | |
//Other: experimental, undefined, concept, prototype, pre alpha, stable, design, implementation, unknown, just started, unproven, working on a prototype, startup phase | |
if (development.lastUpdated.after(6.months.ago)) | |
{ | |
return true | |
} | |
return false | |
} | |
} | |
println "==<div id=\"ActiveDevelopments\">Active Developments (${activeDevelopments.size()})</div>==" | |
println "Active is where:" | |
println "* Status is not Abandoned or Obsolete" | |
println "* Status is working" | |
println "* Page is not a development stub and Page was updated in the last 6 months" | |
printDevelopments(activeDevelopments.sort(ageComparator)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment