Skip to content

Instantly share code, notes, and snippets.

@wstrange
Created September 24, 2014 05:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wstrange/818eec424d0b9d89cfbe to your computer and use it in GitHub Desktop.
Save wstrange/818eec424d0b9d89cfbe to your computer and use it in GitHub Desktop.
// Bert's special parser
import "package:html5lib/parser.dart" as html;
import 'package:html5lib/dom.dart';
import "dart:convert";
import "dart:io";
import "package:csvparser/csvparser.dart";
// recursively print html node content
flatten(Element e) {
// we only output the content of terminal nodes that have no children
if (e.children.length == 0) {
print(e.text);
}
else {
e.children.forEach((node) => flatten(node));
}
}
main() {
var f = new File('/tmp/test.csv');
var content =
f.readAsStringSync(encoding: new Utf8Codec(allowMalformed: true));
var c = new CsvParser(content, seperator: ';');
while (c.moveNext()) {
var l = c.getLineAsList(); // get line as array
var question = l[2]; // question is 3rd field
var answer = l[3]; // answer is 4th field
print("question = $question"); // question is plain ascii
// answer is html that needs to be extracted as ascii
html.parse(answer).children.forEach((c) => flatten(c));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment