Skip to content

Instantly share code, notes, and snippets.

@attilam
Created July 1, 2019 07:15
Show Gist options
  • Save attilam/e7036ca29216eeab7cc614fb880bf503 to your computer and use it in GitHub Desktop.
Save attilam/e7036ca29216eeab7cc614fb880bf503 to your computer and use it in GitHub Desktop.
const Parser = require('fast-xml-parser')
const he = require('he')
const createCsvWriter = require('csv-writer').createObjectCsvWriter
const fs = require('fs')
const xmlFile = fs.readFileSync('../../../Downloads/folkets_sv_en_public.xml', 'utf-8')
const options = {
attributeNamePrefix: '@_',
attrNodeName: false, // default is 'false'
textNodeName: '#text',
ignoreAttributes: false,
ignoreNameSpace: false,
allowBooleanAttributes: false,
parseNodeValue: true,
parseAttributeValue: false,
trimValues: true,
cdataTagName: '__cdata', // default is 'false'
cdataPositionChar: '\\c',
localeRange: '', // To support non english character in tag/attribute values.
parseTrueNumberOnly: false,
attrValueProcessor: a => he.decode(a, {isAttributeValue: true}), // default is a=>a
tagValueProcessor: a => he.decode(a) // default is a=>a
}
const tObj = Parser.getTraversalObj(xmlFile, options)
const jsonObj = Parser.convertToJson(tObj, options)
const words = jsonObj['dictionary']['word']
let csvWords = []
words.forEach(word => {
let sv = word['@_value']
let en = ''
let translation = word['translation']
if (translation !== undefined) {
en = translation['@_value']
} else {
en = ''
}
if (en !== '') csvWords.push({sv, en})
})
const csvWriter = createCsvWriter({
path: 'sven.csv',
header: [
{'id': 'sv', title: 'sv'},
{'id': 'en', title: 'en'}
]
})
csvWriter
.writeRecords(csvWords)
.then(() => console.log(`Wrote ${csvWords.length} CSV records.`))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment