Last active
March 2, 2016 15:25
-
-
Save nathanchicken/a0a3624c6859260e8699 to your computer and use it in GitHub Desktop.
Parses a CSV and outputs successes and failures, based on sanitisation. Currently this is just checking for valid emails. This requires some set up and knowledge of your CSV's columns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* -------------------- | |
* Configuration | |
* -------------------- | |
*/ | |
// CSV source | |
var sourceFile = "Agents_1000s.csv"; | |
// Where to save processed entries | |
var outputFileName = "agents_processed.csv"; | |
// Where to save the 'failures' | |
var errorsFileName = "agents_errors.csv"; | |
// CSV saving options. Below is configured to be comma delimited | |
// and use quotemarks to seperate columns, even if empty. It will | |
// also add in header labels eg "email","username" which is garnered | |
// from the object property names defined below | |
var csvOptions = { | |
delimiter : ',', | |
header : false, | |
quoted : true | |
}; | |
// MAPPING: | |
// Map your properties to your position in the array of the CSV you are importing | |
// | |
// EG: output.username = record[7] if username was the 8th column of the csv | |
var mapping = { | |
username : 7, | |
email : 2, | |
displayName : 0 | |
}; | |
// Add your defaults here, perhaps this could be modified to | |
// be functions that return strings eg random password generator | |
var defaultColumns = { | |
password : 'defaultPassword' | |
}; | |
/** | |
* -------------------- | |
* Modules | |
* -------------------- | |
*/ | |
var fs = require('fs'); | |
var parse = require('csv').parse; | |
var stringify = require('csv').stringify; | |
var colors = require('colors'); | |
var transform = require('stream-transform'); | |
/** | |
* -------------------- | |
* Code... | |
* --------------------- | |
*/ | |
// bug with the two processed files not being treated seperately. | |
var csvOptions2 = Object.assign({}, csvOptions); | |
// create a stream for our source CSV | |
var input = fs.createReadStream( sourceFile ); | |
// regexp found from the internet - this is just to check emails. | |
function isEmail(email){ | |
return /^([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x22([^\x0d\x22\x5c\x80-\xff]|\x5c[\x00-\x7f])*\x22)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x22([^\x0d\x22\x5c\x80-\xff]|\x5c[\x00-\x7f])*\x22))*\x40([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x5b([^\x0d\x5b-\x5d\x80-\xff]|\x5c[\x00-\x7f])*\x5d)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x5b([^\x0d\x5b-\x5d\x80-\xff]|\x5c[\x00-\x7f])*\x5d))*$/.test( email ); | |
} | |
// Create arrays to store processed data | |
// store processed in this one... | |
var processed = []; | |
// any errors in this one | |
var errors = []; | |
// create a CSV parser. This just tells us our source is comma delimited | |
var count = 0; | |
function findWhere(list, props) { | |
var idx = 0; | |
var len = list.length; | |
var match = false; | |
var item, item_k, item_v, prop_k, prop_val; | |
for (; idx<len; idx++) { | |
item = list[idx]; | |
for (prop_k in props) { | |
// If props doesn't own the property, skip it. | |
if (!props.hasOwnProperty(prop_k)) continue; | |
// If item doesn't have the property, no match; | |
if (!item.hasOwnProperty(prop_k)) { | |
match = false; | |
break; | |
} | |
if (props[prop_k] === item[prop_k]) { | |
// We have a match…so far. | |
match = true; | |
} else { | |
// No match. | |
match = false; | |
// Don't compare more properties. | |
break; | |
} | |
} | |
// We've iterated all of props' properties, and we still match! | |
// Return that item! | |
if (match) return item; | |
} | |
// No matches | |
return null; | |
} | |
// create a transform function to go through records | |
var transformer = function( record, callback ) { | |
// at this point record is an array of each column in this row. | |
// create a new object, merging in the default columns. | |
var obj = Object.assign({}, defaultColumns); | |
var fail = false; | |
var reason = ''; | |
// loop through our mapping | |
for (var prop in mapping) { | |
obj[prop] = record[ mapping[prop] ]; | |
// check for duplicates | |
(function() { | |
var checkObj = {}; | |
checkObj[ prop ] = record[ mapping[prop] ]; | |
var existing = findWhere( processed, checkObj ); | |
if ( existing ) { | |
// console.log( ( record[ mapping[prop] ] + " exists ").red ); | |
fail = true; | |
reason += "| " + prop + " exists "; | |
} | |
// now check in existing list | |
})(); | |
} | |
// EE needs long usernames | |
if ( obj.username.length < 4) { | |
fail = true; | |
reason += "| Username Too Short " | |
} | |
if ( obj.username === "naturalhighsafaris") { | |
fail = true; | |
reason += "| Username Too Short " | |
} | |
// run any sanitisation, change the output, or discard it.... | |
if ( !isEmail( obj.email ) ) | |
{ | |
fail = true; | |
reason += "| Email not valid "; | |
} | |
// run other sanitations here... | |
// Username | |
// we're not running callbacks at this point | |
// callback(); // ??? | |
if ( fail ) { | |
obj.reason = reason; | |
// console.log( obj ); | |
errors.push( obj ); | |
} else { | |
processed.push( obj ); | |
} | |
// console.log( obj ); | |
}; | |
// loop through our lines and process the data. | |
// save the CSVs when the read closes. | |
var readLines = function( ) { | |
var lineReader = require('readline').createInterface({ | |
input: require('fs').createReadStream( sourceFile ) | |
}); | |
lineReader.on('line', function (line) { | |
var row = parse( line, {}, function( err, output) { | |
transformer( output[0] ); | |
count++; | |
}); | |
}); | |
lineReader.on('close', function() { | |
stringify(processed, csvOptions, function(err, output){ | |
fs.writeFile( outputFileName, output, false, function() { | |
console.log( ("saved " + processed.length + " records in ").green + outputFileName.green ); | |
}); | |
}); | |
// Save the failures. | |
stringify(errors, csvOptions2, function(err, errOutput){ | |
fs.writeFile( errorsFileName, errOutput, false, function() { | |
console.log( ("saved " + errors.length + " records in ").red + errorsFileName.red ); | |
}); | |
}); | |
}); | |
}; | |
/* run */ | |
// do something here if you wanted to, say, open a member comparison file and run below as a callback. | |
readLines(); | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "csv", | |
"version": "1.0.0", | |
"description": "", | |
"main": "csv.js", | |
"dependencies": { | |
"stream-transform": "^0.1.1", | |
"csv": "^0.4.6", | |
"colors": "^1.1.2" | |
}, | |
"devDependencies": {}, | |
"scripts": { | |
"test": "echo \"Error: no test specified\" && exit 1" | |
}, | |
"author": "", | |
"license": "ISC" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Previous version did not loop through all the data properly. This might be an issue with piping (which I need to learn more about), but it might also be some sort of issue with the stream just being "data" and not being line at a time. ReadLines worked fine though.