Last active
October 3, 2019 14:19
-
-
Save wswoodruff/4fc0cc48480aa2ae7ce7c7091bf1b476 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
'use strict'; | |
const Fs = require('fs'); | |
const Util = require('util'); | |
const Xml2js = require('xml2js'); | |
const XmlNodes = require('xml-nodes'); | |
const Miss = require('mississippi'); | |
const internals = {}; | |
module.exports = async ({ | |
filePath, | |
fileExtension, | |
targetNode, | |
rootNode = 'root', | |
splitOnNumber = 400, | |
nodeFilter | |
}) => { | |
if (!filePath || !fileExtension) { | |
throw new Error('Invalid arguments: filePath, fileExtension are required'); | |
} | |
const { filterNodes } = internals; | |
const fileStream = Fs.createReadStream(`${filePath}.${fileExtension}`); | |
const xmlHead = '<?xml version="1.0" encoding="UTF-8"?>'; | |
let currentFile = ''; | |
let fileCount = 0; | |
let currentFileTargetNodeCount = 0; | |
const passThrough = (nodeAsStr, enc, next) => next(null, nodeAsStr); | |
const processNode = async (nodeAsStr, enc, next) => { | |
currentFile += nodeAsStr; | |
currentFileTargetNodeCount++; | |
if (currentFileTargetNodeCount < splitOnNumber) { | |
return next(); | |
} | |
currentFileTargetNodeCount = 0; | |
const newXmlFile = `${xmlHead}\n<${rootNode}>\n${currentFile}\n</${rootNode}>`; | |
currentFile = ''; | |
try { | |
await Util.promisify(Fs.writeFile)(`${filePath}-${fileCount++}.${fileExtension}`, newXmlFile); | |
next(); | |
} | |
catch (err) { | |
next(err); | |
} | |
}; | |
await Util.promisify(Miss.pipe)( | |
fileStream, | |
XmlNodes(targetNode), | |
nodeFilter ? Miss.through(filterNodes(nodeFilter)) : Miss.through(passThrough), | |
Miss.through(processNode) | |
); | |
// Create file for remainder | |
if (currentFileTargetNodeCount > 0) { | |
const newXmlFile = `${xmlHead}\n<${rootNode}>\n${currentFile}\n</${rootNode}>`; | |
await Util.promisify(Fs.writeFile)(`${filePath}-${fileCount++}.${fileExtension}`, newXmlFile); | |
} | |
return fileCount; | |
}; | |
internals.filterNodes = (filterFunc) => { | |
return async (nodeAsStr, enc, next) => { | |
const xmlParser = new Xml2js.Parser(); | |
try { | |
const keep = await filterFunc(await xmlParser.parseStringPromise(nodeAsStr)); | |
// Let it pass on thru or not | |
keep ? next(null, nodeAsStr) : next(); | |
} | |
catch (err) { | |
next(err); | |
} | |
}; | |
}; |
Yoooo thanks for comments!
- Yeah we should probably validate
targetNode
exists, I thinkXmlNodes
will choke on weird or no input - I was trying to only create a parser if the
filterNodes
was going to be used since it's optional, but yeah turns out I'm creating one per-node — loL oops! That doesn't seem right I should fix that - The nested
await
forawait filterFunc(await xmlParser.parseStringPromise(nodeAsStr))
— This behaves the same as creating an intermediate var forawait xmlParser.parseStringPromise(nodeAsStr)
on the line above and passing it tofilterFunc
- Yeah — seems like I could break out a write file func that could be shared between the final file write and
processNode
— the file write line still needs to mutatefileCount
which lives in the main exported scope so for now I think it should be declared in that scope — dunno how I feel about passing a var to a func that then mutates it but maybe that could work too if there was a comment, dunno not sure yet /shrug
werd, all makes sense. thanks for the explanations 🙏 🍷
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yo! This is pretty cool. I've never had to work w/ XML before. THanks for sharing 🙏
Couple of minor things that caught my eye:
Do there need to be any assertions / validation on the
targetNode
argument? Or, more specifically, doesXmlNodes
choke on certain inputs we can anticipate?Is it necessary to create a new xml parser per node? or could we reuse a "global" / shared one?
await filterFunc(await xmlParser.parseStringPromise(nodeAsStr))
!!!! I'd never seen this before, very cool. Does this behave any differently than if you called the await'd argument on a separate line, above the parsing call?This is nitpicky for sure, but something that i had to sit with for a bit when reading this: would it be possible to separate the file writing from the
processNode
func entirely? Again, a matter of preference, let alone if it's possible or not (I dunno how to use any of the tools at play here :) ), but figuring out how the file writing in theprocessNode
block and how it relates to the remainder file creation at the end of the file was tricky for me, I think just b/c they weren't located together and thatprocessNode
seemed to have 2 responsibilities: breaking the XML stream into smaller chunks and writing those chunks to files. Could the file writing be relocated to and consolidated in another processing function, the final step in that pipe sequence, maybe, like accepting a file chunk output from theprocessNode
function?