lstroud/scrub_opml.groovy

## scrub_opml.groovy
/*
   Copyright 2014 Les Stroud

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

@Grab(group='org.codehaus.gpars', module='gpars', version='1.1.0')
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7' )
@Grab(group='net.sourceforge.nekohtml', module='nekohtml', version='1.9.20')
@Grab(group='net.sf.opencsv', module='opencsv', version='2.3')

import groovyx.net.http.HTTPBuilder
import groovyx.net.http.ContentType
import groovyx.net.http.Method
import groovyx.net.http.Status
import groovyx.gpars.GParsPool
import org.cyberneko.html.parsers.SAXParser
import groovy.xml.XmlUtil
import org.xml.sax.SAXException
import groovy.xml.MarkupBuilder
import groovy.json.JsonBuilder
import groovy.json.JsonSlurper
import au.com.bytecode.opencsv.CSVReader
import au.com.bytecode.opencsv.CSVWriter
import groovy.json.JsonOutput

rootFolder = null;
currentFolder = null;
count = [
	active_count:0,
	inactive_count:0,
	total_count:0
]
feed_count = 0;
active_summary = [];

def cli = new CliBuilder(usage: 'groovy scrub_opml.groovy [-h] [-ads] [-f <expression closure>] [-o <outputformat>] [-O <outputfile_path>] -i <inputformat> <inputfile_path>')
cli.with {
    h longOpt: 'help', 'Show usage information'
    a longOpt: 'active', 'Filter for active feeds'
    i longOpt: 'informat', args:1, argName:'in-format', 'Input file format. Valid values (XML, JSON, CSV)'
    t longOpt: 'outformat', args:1, argName:'out-format','Output file format. Valid values (XML, JSON, CSV)'
    o longOpt: 'outfile', args:1, argName:'out-path', 'Output file path.'
    d longOpt: 'dedup', 'Deduplicate the feeds.'
    s longOpt: 'stats', 'Print stats.'
    f longOpt: 'filter', args:1, argName:'filter-exp', 'Filter Feeds by closure (closure must be quoted and evaluatable) [ -f "{feed -> return feed.name.startsWith(\'CSS\')}"'
}

filter_active = false
dedup = false
informat = null
outformat = null
input_file = null
output_file = null
print_stats = false
filter = null

if(!args){
	cli.usage()
	System.exit(1)
}
def options = cli.parse(args)
if (options.h) {
    cli.usage()
    System.exit(0)
}
if(options.a)
	filter_active = true
if(options.d)
	dedup = true
if(options.s)
	print_stats = true
if(options.f){
	filter = Eval.me(options.f)
}
if(options.i){
	informat = OPMLModel.FORMAT.valueOf(options.i)
} else {
	informat = OPMLModel.FORMAT.XML
}
if(options.t){
	outformat = OPMLModel.FORMAT.valueOf(options.t)
} else {
	outformat = informat
}
if(options.o){
	try{
		output_file = new File(options.o)
	} catch (t){
		t.printStackTrace()
		cli.usage()
		System.exit(1)
	}
}

def extraArguments = options.arguments()
if(extraArguments){
	try{
		input_file = new File(extraArguments[0])
	} catch (t){
		t.printStackTrace()
		cli.usage()
		System.exit(1)
	}
}

def main(){

	def opmlModel = OPMLModel.deserialize(informat, input_file)
	if(print_stats)
		println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))

	if(filter_active){
		opmlModel = opmlModel.filterBy(isActive)
		println "After Active Filter"
		println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	if(filter){
		opmlModel = opmlModel.filterBy(filter)
		println "After Filter ${filter}"
		println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	if(dedup){
		opmlModel = opmlModel.deDup()
		println "After DeDuplication"
		println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	def output = opmlModel.serialize(outformat)
	if(output_file){
		output_file.text = output
	} else {
		println output
	}
}


year_ago = new Date() - 365;
isActive = {feed ->
	//def content = feed.url.toURL().text
	def content = ""
	def active = false
	def tryagain = false
	def error_msg = null
	def _pubdates = []
	try {
		//def http = new HTTPBuilder(feed.url)
		//def rss = http.get([contentType: groovyx.net.http.ContentType.XML])
		//def content = feed.url.toURL().getText([connectTimeout:5000, readTimeout:20000])
		def resp = new HTTPBuilder().request(feed.url, Method.GET, ContentType.TEXT) { req ->
			headers.Accept = 'application/rss+xml, application/rdf+xml, application/xml, text/xml'
			response.success = { r, reader ->
				content = reader.text
				return r
			}

			def unrecoverable  = { r ->
				throw new RuntimeException(r.statusLine as String)
			}

			response.'404' = unrecoverable
			response.'500' = unrecoverable
			response.'403' = unrecoverable
			response.failure = { r ->
				tryagain = true;
				return r
			}
		}
		if(tryagain){
			tryagain = false
			resp = new HTTPBuilder().request(feed.url, Method.GET, ContentType.TEXT) { req ->
				headers.Accept = 'application/rss+xml, application/rdf+xml, application/xml, text/xml'
				response.success = { r, reader ->
					content = reader.text
					return r
				}
				response.failure  = { r -> throw new RuntimeException(r.statusLine as String)}
			}
		}
		if(Status.SUCCESS.matches(resp.status)){
			//feeds were too inconsistent and I didn't really need it to be parsed
			//def rss = new XmlSlurper().parseText(content)
			//println XmlUtil.serialize(rss)
			//active = (rss.channel.item.pubDate.find{(new Date(it.text())) > year_ago})
			def m = content =~ /pubDate>(.*)<\/pubDate/
			m.each{match ->
				def _pubdate
				try{
					_pubdate = new Date(match[1])
					_pubdates << _pubdate
				} catch(t){;;}

				if(_pubdate && _pubdate > year_ago)
					active = true
			}
		}

	} catch (IOException ioe){
		error_msg = "ERROR checking: ${feed.name} ${feed.url}"
		def sw = new StringWriter()
		def pw = new PrintWriter(sw)
		ioe.printStackTrace(pw)
		error_msg += "\n" + sw.toString()
		active = false
	} catch (SAXException se){
		error_msg = "ERROR checking: ${feed.url} - [Parse Error - assuming active]"
		active = true
	} catch (t){
		error_msg = "ERROR checking: ${feed.name} ${feed.url} - [${t.message}]"
		active = false
	}
	count.total_count++
	if(active)
		count.active_count++
	else
		count.inactive_count++
	printProgBar((count.total_count/feed_count)*100 as int)
	active_summary << "${feed.name} [${(active)?'Active':'Inactive'}] - Latest pub date: ${_pubdates.max() as String}"

	return active
}


class OPMLModel {

	static enum FORMAT {
		XML, JSON, CSV
	}

	def root;

	OPMLModel(){
		root = new Folder(name:'Root')
	}

	static OPMLModel deserialize(FORMAT format, File file){
		switch(format){
			case FORMAT.XML:
				return XMLConverter.deserialize(file);
			case FORMAT.JSON:
				return JSONConverter.deserialize(file);
			case FORMAT.CSV:
				return CSVConverter.deserialize(file);

		}
	}

	def deDup(){
		def feed_map = [:]
		deDupFeeds(this.root.children, null, feed_map)

		//println JsonOutput.prettyPrint(JsonOutput.toJson(feed_map))

		def folder_hash = [:]
		OPMLModel model = new OPMLModel()
		feed_map.each{ k,v ->
			if(v.folders){
				v.folders.each { t ->
					def _t = t.trim()
					if(!folder_hash[_t])
						folder_hash[_t] = new Folder(name:_t)
					folder_hash[_t].children << v.feed
				}
			} else {
				model.root.children << v.feed
			}
		}
		folder_hash.each{ k,v ->
			model.root.children << v
		}
		return model
	}

	private deDupFeeds(nodes, parent, feed_map){
		nodes.each{ node ->
			if(node instanceof Feed){
				if(!feed_map[node.url]){
					def _folders = []
					if(parent)
						_folders = [parent.name]
					feed_map[node.url] = [feed: node, folders: _folders]
				}
				else{
					if(feed_map[node.url].folders){
						//println "Feed: ${node.name} already exists in folder(s) ${feed_map[node.url].folders}.  Dropping this copy ${node.toString()} in folder ${parent.toString()}"
						;;
					} else {
						if(parent){
							feed_map[node.url].folders << parent.name
						}
					}
				}
			} else if(node instanceof Folder) {
				deDupFeeds(node.children, node, feed_map)
			}
		}
	}

	def getStats(){
		def stats = [feed_count: 0, folder_count: 0, feeds_in_folders: 0]
		getOPMLStats(this.root.children, null, stats)
		return stats
	}

	private getOPMLStats(nodes, parent, stats){
		nodes.each{ node ->
			if(node instanceof Feed){
				stats.feed_count++;
				if(parent)
					stats.feeds_in_folders++;
			} else if(node instanceof Folder) {
				stats.folder_count++;
				getOPMLStats(node.children, node, stats)
			}
		}
	}

	def OPMLModel filterBy(predicate){
		def model = new OPMLModel();
		model.root = this.root.filterBy(predicate)
		return model
	}

	def String serialize(FORMAT format){
		switch(format){
			case FORMAT.XML:
				return XMLConverter.serialize(this);
			case FORMAT.JSON:
				return JSONConverter.serialize(this);
			case FORMAT.CSV:
				return CSVConverter.serialize(this);

		}
	}
}

class XMLConverter {

	static OPMLModel deserialize(File file){
		def model = new OPMLModel();
		def records = new XmlSlurper().parseText(file.text)
		model.root.children = buildFolder(records.body.outline)
		return model
	}

	static String serialize(OPMLModel model){
		def writer = new StringWriter()
		def xml = new MarkupBuilder(writer)

		xml.opml(version:'1.0'){
			head{
				title 'RSS Feeds'
			}
			body{
				for(f in model.root.children){
					if(f instanceof Feed){
						outline(text: f.name, title:f.title, url:f.url, xmlUrl:f.url)
					} else if(f instanceof Folder) {
						serializeFolder(xml, f)
					}
				}
			}
		}
		return writer.toString()
	}

	private static serializeFolder(builder, folder){
		builder.outline(text: folder.name){
			for(f in folder.children){
				if(f instanceof Feed){
					outline(text: f.name, title:f.title, url:f.url, xmlUrl:f.url)
				} else if(f instanceof Folder) {
					serializeFolder(builder, f)
				}
			}
		}
	}

	private static buildFolder(elements){
		def feed_list = [];
		//println "building elements ${elements.size()}"
		elements.each{ element ->
			if(!element.'@xmlUrl'.text()){	//is a folder
				//println "Found folder ${element.'@text'} with ${element.outline.size()} children."
				def folder = new Folder(name:element.'@text'.text())
				folder.children = buildFolder(element.outline);
				feed_list << folder
			} else {
				feed_list << new Feed(name:element.'@text'.text(), title:element.'@title'.text(), url:element.'@xmlUrl'.text() )
			}
		}
		return feed_list
	}
}

class JSONConverter {
	static OPMLModel deserialize(File file){
		def model = new OPMLModel();
		def json = new JsonSlurper().parseText(file.text)
		model.root.children = buildFolder(json)
		return model
	}

	static String serialize(OPMLModel model){
		return JsonOutput.prettyPrint(JsonOutput.toJson(model.root.children))
	}

	private static buildFolder(elements){
		def feed_list = [];
		//println "building elements ${elements.size()}"
		elements.each{ element ->
			if(!element.url){	//is a folder
				def folder = new Folder(name:element.name)
				folder.children = buildFolder(element.children);
				feed_list << folder
			} else {
				feed_list << new Feed(name:element.text, title:element.title, url:element.url )
			}
		}
		return feed_list
	}
}

class CSVConverter {
	static OPMLModel deserialize(File file){
		StringReader sr = new StringReader(file.text)
		CSVReader reader = new CSVReader(sr, ',' as char, '"' as char, 1)

		def folder_hash = [:]
		OPMLModel model = new OPMLModel()
		reader.readAll().each{ row ->
			def _name, _title, _url, _tags
			_name = row[0]
			_title = row[1]
			_url = row[2]
			_tags = row[3]
			if(_tags){
				_tags.split(',').each { t ->
					def _t = t.trim()
					if(!folder_hash[_t])
						folder_hash[_t] = new Folder(name:_t)
					folder_hash[_t].children << new Feed(name:_name, title:_title, url:_url)
				}
			} else {
				model.root.children << new Feed(name:_name, title:_title, url:_url)
			}
		}
		folder_hash.each{ k,v ->
			model.root.children << v
		}
		return model
	}

	static String serialize(OPMLModel model){
		def model_list = []
		String[] header = ["Name", "Title", "Url", "Tags"]
		model_list << header
		flatten(model.root.children, null, model_list)
		StringWriter sw = new StringWriter()
		CSVWriter writer = new CSVWriter(sw, ',' as char, '"' as char)
		model_list.each{ String[] row ->
			writer.writeNext(row)
		}
		return sw.toString()
	}

	private static List flatten(nodes, parent, node_list){
		nodes.each{ node ->
			if(node instanceof Feed){
				def node_tags = ""
				if(parent)
					node_tags = parent.name
				String[] flatnode = [node.name, node.title, node.url, node_tags]
				node_list << flatnode
			} else if(node instanceof Folder) {
				flatten(node.children, node, node_list)
			}
		}
	}

}

class Folder {
	String name
	def children = []

	def filterBy(predicate){
		def filtered = []
		//GParsPool.withPool(8){
			children.each{c ->
				def result = c.filterBy(predicate)
				if(result)
					filtered << result
			}
		//}
		if(filtered){
			def _filteredFolder = new Folder(name:name);
			_filteredFolder.children = filtered;
			return _filteredFolder;
		} else {
			return null;
		}
	}

	def String toString(){
		return "Folder: ${name} Child Count: ${children.size()}"
	}
}

class Feed {
	String name
	String title
	String url
	//int article_count = 0

	def filterBy(predicate){
		if(predicate(this))
			return this
		else
			return null
	}

	def String toString(){
		return "Feed Name: ${name} Title: ${title} Url:${url}"
	}
}

def void printProgBar(int percent){
    StringBuilder bar = new StringBuilder("[");

    for(int i = 0; i < 50; i++){
        if( i < (percent/2)){
            bar.append("=");
        }else if( i == (percent/2)){
            bar.append(">");
        }else{
            bar.append(" ");
        }
    }

    bar.append("]   " + percent + "%     ");
    bar.append(" ${count.active_count}|${count.inactive_count}    ")
    System.out.print("\r" + bar.toString());
}

main()
	/*
	Copyright 2014 Les Stroud

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	*/

	@Grab(group='org.codehaus.gpars', module='gpars', version='1.1.0')
	@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7' )
	@Grab(group='net.sourceforge.nekohtml', module='nekohtml', version='1.9.20')
	@Grab(group='net.sf.opencsv', module='opencsv', version='2.3')

	import groovyx.net.http.HTTPBuilder
	import groovyx.net.http.ContentType
	import groovyx.net.http.Method
	import groovyx.net.http.Status
	import groovyx.gpars.GParsPool
	import org.cyberneko.html.parsers.SAXParser
	import groovy.xml.XmlUtil
	import org.xml.sax.SAXException
	import groovy.xml.MarkupBuilder
	import groovy.json.JsonBuilder
	import groovy.json.JsonSlurper
	import au.com.bytecode.opencsv.CSVReader
	import au.com.bytecode.opencsv.CSVWriter
	import groovy.json.JsonOutput

	rootFolder = null;
	currentFolder = null;
	count = [
	active_count:0,
	inactive_count:0,
	total_count:0
	]
	feed_count = 0;
	active_summary = [];

	def cli = new CliBuilder(usage: 'groovy scrub_opml.groovy [-h] [-ads] [-f <expression closure>] [-o <outputformat>] [-O <outputfile_path>] -i <inputformat> <inputfile_path>')
	cli.with {
	h longOpt: 'help', 'Show usage information'
	a longOpt: 'active', 'Filter for active feeds'
	i longOpt: 'informat', args:1, argName:'in-format', 'Input file format. Valid values (XML, JSON, CSV)'
	t longOpt: 'outformat', args:1, argName:'out-format','Output file format. Valid values (XML, JSON, CSV)'
	o longOpt: 'outfile', args:1, argName:'out-path', 'Output file path.'
	d longOpt: 'dedup', 'Deduplicate the feeds.'
	s longOpt: 'stats', 'Print stats.'
	f longOpt: 'filter', args:1, argName:'filter-exp', 'Filter Feeds by closure (closure must be quoted and evaluatable) [ -f "{feed -> return feed.name.startsWith(\'CSS\')}"'
	}

	filter_active = false
	dedup = false
	informat = null
	outformat = null
	input_file = null
	output_file = null
	print_stats = false
	filter = null

	if(!args){
	cli.usage()
	System.exit(1)
	}
	def options = cli.parse(args)
	if (options.h) {
	cli.usage()
	System.exit(0)
	}
	if(options.a)
	filter_active = true
	if(options.d)
	dedup = true
	if(options.s)
	print_stats = true
	if(options.f){
	filter = Eval.me(options.f)
	}
	if(options.i){
	informat = OPMLModel.FORMAT.valueOf(options.i)
	} else {
	informat = OPMLModel.FORMAT.XML
	}
	if(options.t){
	outformat = OPMLModel.FORMAT.valueOf(options.t)
	} else {
	outformat = informat
	}
	if(options.o){
	try{
	output_file = new File(options.o)
	} catch (t){
	t.printStackTrace()
	cli.usage()
	System.exit(1)
	}
	}

	def extraArguments = options.arguments()
	if(extraArguments){
	try{
	input_file = new File(extraArguments[0])
	} catch (t){
	t.printStackTrace()
	cli.usage()
	System.exit(1)
	}
	}

	def main(){

	def opmlModel = OPMLModel.deserialize(informat, input_file)
	if(print_stats)
	println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))

	if(filter_active){
	opmlModel = opmlModel.filterBy(isActive)
	println "After Active Filter"
	println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	if(filter){
	opmlModel = opmlModel.filterBy(filter)
	println "After Filter ${filter}"
	println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	if(dedup){
	opmlModel = opmlModel.deDup()
	println "After DeDuplication"
	println JsonOutput.prettyPrint(JsonOutput.toJson(opmlModel.stats))
	}

	def output = opmlModel.serialize(outformat)
	if(output_file){
	output_file.text = output
	} else {
	println output
	}
	}




	year_ago = new Date() - 365;
	isActive = {feed ->
	//def content = feed.url.toURL().text
	def content = ""
	def active = false
	def tryagain = false
	def error_msg = null
	def _pubdates = []
	try {
	//def http = new HTTPBuilder(feed.url)
	//def rss = http.get([contentType: groovyx.net.http.ContentType.XML])
	//def content = feed.url.toURL().getText([connectTimeout:5000, readTimeout:20000])
	def resp = new HTTPBuilder().request(feed.url, Method.GET, ContentType.TEXT) { req ->
	headers.Accept = 'application/rss+xml, application/rdf+xml, application/xml, text/xml'
	response.success = { r, reader ->
	content = reader.text
	return r
	}

	def unrecoverable = { r ->
	throw new RuntimeException(r.statusLine as String)
	}

	response.'404' = unrecoverable
	response.'500' = unrecoverable
	response.'403' = unrecoverable
	response.failure = { r ->
	tryagain = true;
	return r
	}
	}
	if(tryagain){
	tryagain = false
	resp = new HTTPBuilder().request(feed.url, Method.GET, ContentType.TEXT) { req ->
	headers.Accept = 'application/rss+xml, application/rdf+xml, application/xml, text/xml'
	response.success = { r, reader ->
	content = reader.text
	return r
	}
	response.failure = { r -> throw new RuntimeException(r.statusLine as String)}
	}
	}
	if(Status.SUCCESS.matches(resp.status)){
	//feeds were too inconsistent and I didn't really need it to be parsed
	//def rss = new XmlSlurper().parseText(content)
	//println XmlUtil.serialize(rss)
	//active = (rss.channel.item.pubDate.find{(new Date(it.text())) > year_ago})
	def m = content =~ /pubDate>(.*)<\/pubDate/
	m.each{match ->
	def _pubdate
	try{
	_pubdate = new Date(match[1])
	_pubdates << _pubdate
	} catch(t){;;}

	if(_pubdate && _pubdate > year_ago)
	active = true
	}
	}

	} catch (IOException ioe){
	error_msg = "ERROR checking: ${feed.name} ${feed.url}"
	def sw = new StringWriter()
	def pw = new PrintWriter(sw)
	ioe.printStackTrace(pw)
	error_msg += "\n" + sw.toString()
	active = false
	} catch (SAXException se){
	error_msg = "ERROR checking: ${feed.url} - [Parse Error - assuming active]"
	active = true
	} catch (t){
	error_msg = "ERROR checking: ${feed.name} ${feed.url} - [${t.message}]"
	active = false
	}
	count.total_count++
	if(active)
	count.active_count++
	else
	count.inactive_count++
	printProgBar((count.total_count/feed_count)*100 as int)
	active_summary << "${feed.name} [${(active)?'Active':'Inactive'}] - Latest pub date: ${_pubdates.max() as String}"

	return active
	}


	class OPMLModel {

	static enum FORMAT {
	XML, JSON, CSV
	}

	def root;

	OPMLModel(){
	root = new Folder(name:'Root')
	}

	static OPMLModel deserialize(FORMAT format, File file){
	switch(format){
	case FORMAT.XML:
	return XMLConverter.deserialize(file);
	case FORMAT.JSON:
	return JSONConverter.deserialize(file);
	case FORMAT.CSV:
	return CSVConverter.deserialize(file);

	}
	}

	def deDup(){
	def feed_map = [:]
	deDupFeeds(this.root.children, null, feed_map)

	//println JsonOutput.prettyPrint(JsonOutput.toJson(feed_map))

	def folder_hash = [:]
	OPMLModel model = new OPMLModel()
	feed_map.each{ k,v ->
	if(v.folders){
	v.folders.each { t ->
	def _t = t.trim()
	if(!folder_hash[_t])
	folder_hash[_t] = new Folder(name:_t)
	folder_hash[_t].children << v.feed
	}
	} else {
	model.root.children << v.feed
	}
	}
	folder_hash.each{ k,v ->
	model.root.children << v
	}
	return model
	}

	private deDupFeeds(nodes, parent, feed_map){
	nodes.each{ node ->
	if(node instanceof Feed){
	if(!feed_map[node.url]){
	def _folders = []
	if(parent)
	_folders = [parent.name]
	feed_map[node.url] = [feed: node, folders: _folders]
	}
	else{
	if(feed_map[node.url].folders){
	//println "Feed: ${node.name} already exists in folder(s) ${feed_map[node.url].folders}. Dropping this copy ${node.toString()} in folder ${parent.toString()}"
	;;
	} else {
	if(parent){
	feed_map[node.url].folders << parent.name
	}
	}
	}
	} else if(node instanceof Folder) {
	deDupFeeds(node.children, node, feed_map)
	}
	}
	}

	def getStats(){
	def stats = [feed_count: 0, folder_count: 0, feeds_in_folders: 0]
	getOPMLStats(this.root.children, null, stats)
	return stats
	}

	private getOPMLStats(nodes, parent, stats){
	nodes.each{ node ->
	if(node instanceof Feed){
	stats.feed_count++;
	if(parent)
	stats.feeds_in_folders++;
	} else if(node instanceof Folder) {
	stats.folder_count++;
	getOPMLStats(node.children, node, stats)
	}
	}
	}

	def OPMLModel filterBy(predicate){
	def model = new OPMLModel();
	model.root = this.root.filterBy(predicate)
	return model
	}

	def String serialize(FORMAT format){
	switch(format){
	case FORMAT.XML:
	return XMLConverter.serialize(this);
	case FORMAT.JSON:
	return JSONConverter.serialize(this);
	case FORMAT.CSV:
	return CSVConverter.serialize(this);

	}
	}
	}

	class XMLConverter {

	static OPMLModel deserialize(File file){
	def model = new OPMLModel();
	def records = new XmlSlurper().parseText(file.text)
	model.root.children = buildFolder(records.body.outline)
	return model
	}

	static String serialize(OPMLModel model){
	def writer = new StringWriter()
	def xml = new MarkupBuilder(writer)

	xml.opml(version:'1.0'){
	head{
	title 'RSS Feeds'
	}
	body{
	for(f in model.root.children){
	if(f instanceof Feed){
	outline(text: f.name, title:f.title, url:f.url, xmlUrl:f.url)
	} else if(f instanceof Folder) {
	serializeFolder(xml, f)
	}
	}
	}
	}
	return writer.toString()
	}

	private static serializeFolder(builder, folder){
	builder.outline(text: folder.name){
	for(f in folder.children){
	if(f instanceof Feed){
	outline(text: f.name, title:f.title, url:f.url, xmlUrl:f.url)
	} else if(f instanceof Folder) {
	serializeFolder(builder, f)
	}
	}
	}
	}

	private static buildFolder(elements){
	def feed_list = [];
	//println "building elements ${elements.size()}"
	elements.each{ element ->
	if(!element.'@xmlUrl'.text()){ //is a folder
	//println "Found folder ${element.'@text'} with ${element.outline.size()} children."
	def folder = new Folder(name:element.'@text'.text())
	folder.children = buildFolder(element.outline);
	feed_list << folder
	} else {
	feed_list << new Feed(name:element.'@text'.text(), title:element.'@title'.text(), url:element.'@xmlUrl'.text() )
	}
	}
	return feed_list
	}
	}

	class JSONConverter {
	static OPMLModel deserialize(File file){
	def model = new OPMLModel();
	def json = new JsonSlurper().parseText(file.text)
	model.root.children = buildFolder(json)
	return model
	}

	static String serialize(OPMLModel model){
	return JsonOutput.prettyPrint(JsonOutput.toJson(model.root.children))
	}

	private static buildFolder(elements){
	def feed_list = [];
	//println "building elements ${elements.size()}"
	elements.each{ element ->
	if(!element.url){ //is a folder
	def folder = new Folder(name:element.name)
	folder.children = buildFolder(element.children);
	feed_list << folder
	} else {
	feed_list << new Feed(name:element.text, title:element.title, url:element.url )
	}
	}
	return feed_list
	}
	}

	class CSVConverter {
	static OPMLModel deserialize(File file){
	StringReader sr = new StringReader(file.text)
	CSVReader reader = new CSVReader(sr, ',' as char, '"' as char, 1)

	def folder_hash = [:]
	OPMLModel model = new OPMLModel()
	reader.readAll().each{ row ->
	def _name, _title, _url, _tags
	_name = row[0]
	_title = row[1]
	_url = row[2]
	_tags = row[3]
	if(_tags){
	_tags.split(',').each { t ->
	def _t = t.trim()
	if(!folder_hash[_t])
	folder_hash[_t] = new Folder(name:_t)
	folder_hash[_t].children << new Feed(name:_name, title:_title, url:_url)
	}
	} else {
	model.root.children << new Feed(name:_name, title:_title, url:_url)
	}
	}
	folder_hash.each{ k,v ->
	model.root.children << v
	}
	return model
	}

	static String serialize(OPMLModel model){
	def model_list = []
	String[] header = ["Name", "Title", "Url", "Tags"]
	model_list << header
	flatten(model.root.children, null, model_list)
	StringWriter sw = new StringWriter()
	CSVWriter writer = new CSVWriter(sw, ',' as char, '"' as char)
	model_list.each{ String[] row ->
	writer.writeNext(row)
	}
	return sw.toString()
	}

	private static List flatten(nodes, parent, node_list){
	nodes.each{ node ->
	if(node instanceof Feed){
	def node_tags = ""
	if(parent)
	node_tags = parent.name
	String[] flatnode = [node.name, node.title, node.url, node_tags]
	node_list << flatnode
	} else if(node instanceof Folder) {
	flatten(node.children, node, node_list)
	}
	}
	}

	}

	class Folder {
	String name
	def children = []

	def filterBy(predicate){
	def filtered = []
	//GParsPool.withPool(8){
	children.each{c ->
	def result = c.filterBy(predicate)
	if(result)
	filtered << result
	}
	//}
	if(filtered){
	def _filteredFolder = new Folder(name:name);
	_filteredFolder.children = filtered;
	return _filteredFolder;
	} else {
	return null;
	}
	}

	def String toString(){
	return "Folder: ${name} Child Count: ${children.size()}"
	}
	}

	class Feed {
	String name
	String title
	String url
	//int article_count = 0

	def filterBy(predicate){
	if(predicate(this))
	return this
	else
	return null
	}

	def String toString(){
	return "Feed Name: ${name} Title: ${title} Url:${url}"
	}
	}

	def void printProgBar(int percent){
	StringBuilder bar = new StringBuilder("[");

	for(int i = 0; i < 50; i++){
	if( i < (percent/2)){
	bar.append("=");
	}else if( i == (percent/2)){
	bar.append(">");
	}else{
	bar.append(" ");
	}
	}

	bar.append("] " + percent + "% ");
	bar.append(" ${count.active_count}\|${count.inactive_count} ")
	System.out.print("\r" + bar.toString());
	}

	main()