nathanchicken/csv.js

## csv.js

/**
 * --------------------
 * Configuration
 * --------------------
 */

// CSV source
var sourceFile = "Agents_1000s.csv";

// Where to save processed entries
var outputFileName = "agents_processed.csv";

// Where to save the 'failures'
var errorsFileName = "agents_errors.csv";

// CSV saving options.  Below is configured to be comma delimited
// and use quotemarks to seperate columns, even if empty. It will
// also add in header labels eg "email","username" which is garnered
// from the object property names defined below
var csvOptions = {
    delimiter   : ',',
    header      : false,
    quoted      : true
};


// MAPPING:
// Map your properties to your position in the array of the CSV you are importing
//
// EG: output.username = record[7] if username was the 8th column of the csv
var mapping = {
    username    : 7,
    email       : 2,
    displayName : 0
};


// Add your defaults here, perhaps this could be modified to
// be functions that return strings eg random password generator
var defaultColumns = {
    password    : 'defaultPassword'
};

/**
 * --------------------
 * Modules
 * --------------------
 */
var fs = require('fs');
var parse = require('csv').parse;
var stringify = require('csv').stringify;
var colors = require('colors');
var transform = require('stream-transform');


/**
 * --------------------
 * Code...
 * ---------------------
 */

// bug with the two processed files not being treated seperately.
var csvOptions2 = Object.assign({}, csvOptions);

// create a stream for our source CSV
var input = fs.createReadStream( sourceFile );


// regexp found from the internet - this is just to check emails.
function isEmail(email){
    return /^([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x22([^\x0d\x22\x5c\x80-\xff]|\x5c[\x00-\x7f])*\x22)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x22([^\x0d\x22\x5c\x80-\xff]|\x5c[\x00-\x7f])*\x22))*\x40([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x5b([^\x0d\x5b-\x5d\x80-\xff]|\x5c[\x00-\x7f])*\x5d)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+|\x5b([^\x0d\x5b-\x5d\x80-\xff]|\x5c[\x00-\x7f])*\x5d))*$/.test( email );
}

// Create arrays to store processed data
// store processed in this one...
var processed = [];
// any errors in this one
var errors = [];

// create a CSV parser.  This just tells us our source is comma delimited


var count = 0;

function findWhere(list, props) {
    var idx = 0;
    var len = list.length;
    var match = false;
    var item, item_k, item_v, prop_k, prop_val;
    for (; idx<len; idx++) {
        item = list[idx];
        for (prop_k in props) {
            // If props doesn't own the property, skip it.
            if (!props.hasOwnProperty(prop_k)) continue;
            // If item doesn't have the property, no match;
            if (!item.hasOwnProperty(prop_k)) {
                match = false;
                break;
            }
            if (props[prop_k] === item[prop_k]) {
                // We have a match…so far.
                match = true;
            } else {
                // No match.
                match = false;
                // Don't compare more properties.
                break;
            }
        }
        // We've iterated all of props' properties, and we still match!
        // Return that item!
        if (match) return item;
    }
    // No matches
    return null;
}

// create a transform function to go through records
var transformer = function( record, callback ) {


    // at this point record is an array of each column in this row.

    // create a new object, merging in the default columns.
    var obj = Object.assign({}, defaultColumns);
    var fail = false;
    var reason = '';

    // loop through our mapping

    for (var prop in mapping) {

        obj[prop] = record[ mapping[prop] ];

        // check for duplicates

        (function() {

            var checkObj = {};

            checkObj[ prop ] = record[ mapping[prop] ];

            var existing = findWhere( processed, checkObj );

            if ( existing ) {

                // console.log( ( record[ mapping[prop] ] + " exists ").red );
                fail = true;
                reason += "| " + prop + " exists ";
            }

            // now check in existing list


        })();


    }

    // EE needs long usernames
    if ( obj.username.length < 4) {
        fail = true;
        reason += "| Username Too Short "
    }


    if ( obj.username === "naturalhighsafaris") {
        fail = true;
        reason += "| Username Too Short "
    }


    // run any sanitisation, change the output, or discard it....
    if ( !isEmail( obj.email ) )
    {
        fail = true;
        reason += "| Email not valid ";
    }

    // run other sanitations here...
    // Username


    // we're not running callbacks at this point
    // callback(); // ???

    if ( fail ) {
        obj.reason = reason;
        // console.log( obj );
        errors.push( obj );
    } else {
        processed.push( obj );
    }
   // console.log( obj );


};

// loop through our lines and process the data.
// save the CSVs when the read closes.
var readLines = function( ) {

    var lineReader = require('readline').createInterface({
      input: require('fs').createReadStream( sourceFile )
    });

    lineReader.on('line', function (line) {

        var row = parse( line, {}, function( err, output) {
            transformer( output[0] );
            count++;
        });


    });


    lineReader.on('close', function() {


        stringify(processed, csvOptions, function(err, output){

            fs.writeFile( outputFileName, output, false, function() {
                console.log( ("saved " + processed.length + " records in ").green + outputFileName.green );
            });

        });

        // Save the failures.
        stringify(errors, csvOptions2, function(err, errOutput){

            fs.writeFile( errorsFileName, errOutput, false, function() {
                console.log( ("saved " + errors.length + " records in ").red + errorsFileName.red );
            });

        });
    });
};

/* run */

// do something here if you wanted to, say, open a member comparison file and run below as a callback.


readLines();


## package.json
{
  "name": "csv",
  "version": "1.0.0",
  "description": "",
  "main": "csv.js",
  "dependencies": {
    "stream-transform": "^0.1.1",
    "csv": "^0.4.6",
    "colors": "^1.1.2"
  },
  "devDependencies": {},
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "author": "",
  "license": "ISC"
}

	/**
	* --------------------
	* Configuration
	* --------------------
	*/

	// CSV source
	var sourceFile = "Agents_1000s.csv";

	// Where to save processed entries
	var outputFileName = "agents_processed.csv";

	// Where to save the 'failures'
	var errorsFileName = "agents_errors.csv";

	// CSV saving options. Below is configured to be comma delimited
	// and use quotemarks to seperate columns, even if empty. It will
	// also add in header labels eg "email","username" which is garnered
	// from the object property names defined below
	var csvOptions = {
	delimiter : ',',
	header : false,
	quoted : true
	};


	// MAPPING:
	// Map your properties to your position in the array of the CSV you are importing
	//
	// EG: output.username = record[7] if username was the 8th column of the csv
	var mapping = {
	username : 7,
	email : 2,
	displayName : 0
	};


	// Add your defaults here, perhaps this could be modified to
	// be functions that return strings eg random password generator
	var defaultColumns = {
	password : 'defaultPassword'
	};

	/**
	* --------------------
	* Modules
	* --------------------
	*/
	var fs = require('fs');
	var parse = require('csv').parse;
	var stringify = require('csv').stringify;
	var colors = require('colors');
	var transform = require('stream-transform');




	/**
	* --------------------
	* Code...
	* ---------------------
	*/

	// bug with the two processed files not being treated seperately.
	var csvOptions2 = Object.assign({}, csvOptions);

	// create a stream for our source CSV
	var input = fs.createReadStream( sourceFile );




	// regexp found from the internet - this is just to check emails.
	function isEmail(email){
	return /^([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+\|\x22([^\x0d\x22\x5c\x80-\xff]\|\x5c[\x00-\x7f])\x22)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+\|\x22([^\x0d\x22\x5c\x80-\xff]\|\x5c[\x00-\x7f])\x22))\x40([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+\|\x5b([^\x0d\x5b-\x5d\x80-\xff]\|\x5c[\x00-\x7f])\x5d)(\x2e([^\x00-\x20\x22\x28\x29\x2c\x2e\x3a-\x3c\x3e\x40\x5b-\x5d\x7f-\xff]+\|\x5b([^\x0d\x5b-\x5d\x80-\xff]\|\x5c[\x00-\x7f])\x5d))$/.test( email );
	}

	// Create arrays to store processed data
	// store processed in this one...
	var processed = [];
	// any errors in this one
	var errors = [];

	// create a CSV parser. This just tells us our source is comma delimited


	var count = 0;

	function findWhere(list, props) {
	var idx = 0;
	var len = list.length;
	var match = false;
	var item, item_k, item_v, prop_k, prop_val;
	for (; idx<len; idx++) {
	item = list[idx];
	for (prop_k in props) {
	// If props doesn't own the property, skip it.
	if (!props.hasOwnProperty(prop_k)) continue;
	// If item doesn't have the property, no match;
	if (!item.hasOwnProperty(prop_k)) {
	match = false;
	break;
	}
	if (props[prop_k] === item[prop_k]) {
	// We have a match…so far.
	match = true;
	} else {
	// No match.
	match = false;
	// Don't compare more properties.
	break;
	}
	}
	// We've iterated all of props' properties, and we still match!
	// Return that item!
	if (match) return item;
	}
	// No matches
	return null;
	}

	// create a transform function to go through records
	var transformer = function( record, callback ) {



	// at this point record is an array of each column in this row.

	// create a new object, merging in the default columns.
	var obj = Object.assign({}, defaultColumns);
	var fail = false;
	var reason = '';

	// loop through our mapping

	for (var prop in mapping) {

	obj[prop] = record[ mapping[prop] ];

	// check for duplicates

	(function() {

	var checkObj = {};

	checkObj[ prop ] = record[ mapping[prop] ];

	var existing = findWhere( processed, checkObj );

	if ( existing ) {

	// console.log( ( record[ mapping[prop] ] + " exists ").red );
	fail = true;
	reason += "\| " + prop + " exists ";
	}

	// now check in existing list


	})();


	}

	// EE needs long usernames
	if ( obj.username.length < 4) {
	fail = true;
	reason += "\| Username Too Short "
	}


	if ( obj.username === "naturalhighsafaris") {
	fail = true;
	reason += "\| Username Too Short "
	}


	// run any sanitisation, change the output, or discard it....
	if ( !isEmail( obj.email ) )
	{
	fail = true;
	reason += "\| Email not valid ";
	}

	// run other sanitations here...
	// Username



	// we're not running callbacks at this point
	// callback(); // ???

	if ( fail ) {
	obj.reason = reason;
	// console.log( obj );
	errors.push( obj );
	} else {
	processed.push( obj );
	}
	// console.log( obj );


	};

	// loop through our lines and process the data.
	// save the CSVs when the read closes.
	var readLines = function( ) {

	var lineReader = require('readline').createInterface({
	input: require('fs').createReadStream( sourceFile )
	});

	lineReader.on('line', function (line) {

	var row = parse( line, {}, function( err, output) {
	transformer( output[0] );
	count++;
	});



	});


	lineReader.on('close', function() {



	stringify(processed, csvOptions, function(err, output){

	fs.writeFile( outputFileName, output, false, function() {
	console.log( ("saved " + processed.length + " records in ").green + outputFileName.green );
	});

	});

	// Save the failures.
	stringify(errors, csvOptions2, function(err, errOutput){

	fs.writeFile( errorsFileName, errOutput, false, function() {
	console.log( ("saved " + errors.length + " records in ").red + errorsFileName.red );
	});

	});
	});
	};

	/* run */

	// do something here if you wanted to, say, open a member comparison file and run below as a callback.


	readLines();
	{
	"name": "csv",
	"version": "1.0.0",
	"description": "",
	"main": "csv.js",
	"dependencies": {
	"stream-transform": "^0.1.1",
	"csv": "^0.4.6",
	"colors": "^1.1.2"
	},
	"devDependencies": {},
	"scripts": {
	"test": "echo \"Error: no test specified\" && exit 1"
	},
	"author": "",
	"license": "ISC"
	}