Created
July 20, 2017 07:05
-
-
Save benkahle/b0b60a85533d6f36538063894d37aaec to your computer and use it in GitHub Desktop.
The quickest and dirtiest contact list de-dupe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const path = require('path') | |
const csv = require('csv') | |
const B = path.resolve('./bloodworks.csv') | |
const O = path.resolve('./obliteride.csv') | |
const bRem = path.resolve('./bloodworks_no_email.csv') | |
const bOut = path.resolve('./bloodworks_cleaned.csv') | |
const oRem = path.resolve('./obliteride_no_email.csv') | |
const oOut = path.resolve('./obliteride_cleaned.csv') | |
const overlap = path.resolve('./overlap.csv') | |
const columns = [ | |
"Title","First Name","Middle Name","Last Name","Suffix","Company","Department","Job Title", | |
"Business Street","Business Street 2","Business Street 3","Business City","Business State", | |
"Business Postal Code","Business Country/Region","Home Street","Home Street 2","Home Street 3", | |
"Home City","Home State","Home Postal Code","Home Country/Region","Other Street","Other Street 2", | |
"Other Street 3","Other City","Other State","Other Postal Code","Other Country/Region", | |
"Assistant's Phone","Business Fax","Business Phone","Business Phone 2","Callback","Car Phone", | |
"Company Main Phone","Home Fax","Home Phone","Home Phone 2","ISDN","Mobile Phone","Other Fax", | |
"Other Phone","Pager","Primary Phone","Radio Phone","TTY/TDD Phone","Telex","Account","Anniversary", | |
"Assistant's Name","Billing Information","Birthday","Business Address PO Box","Categories","Children", | |
"Directory Server","E-mail Address","E-mail Type","E-mail Display Name","E-mail 2 Address","E-mail 2 Type", | |
"E-mail 2 Display Name","E-mail 3 Address","E-mail 3 Type","E-mail 3 Display Name","Gender", | |
"Government ID Number","Hobby","Home Address PO Box","Initials","Internet Free Busy","Keywords", | |
"Language","Location","Manager's Name","Mileage","Notes","Office Location","Organizational ID Number", | |
"Other Address PO Box","Priority","Private","Profession","Referred By","Sensitivity","Spouse","User 1", | |
"User 2","User 3","User 4","Web Page"] | |
const emailFields = ['E-mail Address', 'E-mail 2 Address', 'E-mail 3 Address'] | |
const parseOpts = { | |
auto_parse: true, | |
columns: true, | |
relax_column_count: true | |
} | |
const writeOpts = { | |
columns, | |
header: true | |
} | |
function printName (row) { | |
return `${row['First Name']} ${row['Last Name']}` | |
} | |
fs.readFile(B, (err, bFile) => { | |
if (err) return console.log('reading Bloodworks', err) | |
csv.parse(bFile, parseOpts, (err, bDat) => { | |
if (err) return console.log('parsing Bloodworks', err) | |
let bl = bDat.filter(r => emailFields.some(eName => r[eName])) | |
let bRemove = bDat.filter(r => emailFields.every(eName => !r[eName])) | |
fs.readFile(O, (err, oFile) => { | |
if (err) return console.log('reading Oblit', err) | |
csv.parse(oFile, parseOpts, (err, oDat) => { | |
let ol = oDat.filter(r => emailFields.some(eName => r[eName])) | |
let oRemove = oDat.filter(r => emailFields.every(eName => !r[eName])) | |
let dups = [] | |
let oCleaned = oDat.filter(oRow => { | |
oEmails = emailFields.map(eName => oRow[eName]).filter(e => e) | |
bDups = bDat.filter(bRow => { | |
bEmails = emailFields.map(eName => bRow[eName]).filter(e => e) | |
return bEmails.some(bEmail => oEmails.includes(bEmail)) | |
}) | |
if (bDups.length) { | |
// console.log(printName(oRow), 'same as', bDups.map(d => printName(d))) | |
dups.push(oRow) | |
} | |
return !bDups.length | |
}) | |
// B Removed | |
csv.stringify(bRemove, writeOpts, (err, out) => { | |
if (err) return console.log('gen removed Bloodworks', err) | |
fs.writeFileSync(bRem, out) | |
// O Removed | |
csv.stringify(oRemove, writeOpts, (err, out) => { | |
if (err) return console.log('gen removed Oblit', err) | |
fs.writeFileSync(oRem, out) | |
// B Cleaned | |
csv.stringify(bl, writeOpts, (err, out) => { | |
if (err) return console.log('gen cleaned Bloodworks', err) | |
fs.writeFileSync(bOut, out) | |
// Dups | |
csv.stringify(dups, writeOpts, (err, out) => { | |
if (err) return console.log('gen dups', err) | |
fs.writeFileSync(overlap, out) | |
// O Cleaned | |
csv.stringify(oCleaned, writeOpts, (err, out) => { | |
if (err) return console.log('gen cleaned oblit', err) | |
fs.writeFileSync(oOut, out) | |
}) | |
}) | |
}) | |
}) | |
}) | |
}) | |
}) | |
}) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment