Created
September 24, 2014 05:30
-
-
Save wstrange/818eec424d0b9d89cfbe to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Bert's special parser | |
import "package:html5lib/parser.dart" as html; | |
import 'package:html5lib/dom.dart'; | |
import "dart:convert"; | |
import "dart:io"; | |
import "package:csvparser/csvparser.dart"; | |
// recursively print html node content | |
flatten(Element e) { | |
// we only output the content of terminal nodes that have no children | |
if (e.children.length == 0) { | |
print(e.text); | |
} | |
else { | |
e.children.forEach((node) => flatten(node)); | |
} | |
} | |
main() { | |
var f = new File('/tmp/test.csv'); | |
var content = | |
f.readAsStringSync(encoding: new Utf8Codec(allowMalformed: true)); | |
var c = new CsvParser(content, seperator: ';'); | |
while (c.moveNext()) { | |
var l = c.getLineAsList(); // get line as array | |
var question = l[2]; // question is 3rd field | |
var answer = l[3]; // answer is 4th field | |
print("question = $question"); // question is plain ascii | |
// answer is html that needs to be extracted as ascii | |
html.parse(answer).children.forEach((c) => flatten(c)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment