Skip to content

Instantly share code, notes, and snippets.

@benkahle
Created July 20, 2017 07:05
Show Gist options
  • Save benkahle/b0b60a85533d6f36538063894d37aaec to your computer and use it in GitHub Desktop.
Save benkahle/b0b60a85533d6f36538063894d37aaec to your computer and use it in GitHub Desktop.
The quickest and dirtiest contact list de-dupe
const fs = require('fs')
const path = require('path')
const csv = require('csv')
const B = path.resolve('./bloodworks.csv')
const O = path.resolve('./obliteride.csv')
const bRem = path.resolve('./bloodworks_no_email.csv')
const bOut = path.resolve('./bloodworks_cleaned.csv')
const oRem = path.resolve('./obliteride_no_email.csv')
const oOut = path.resolve('./obliteride_cleaned.csv')
const overlap = path.resolve('./overlap.csv')
const columns = [
"Title","First Name","Middle Name","Last Name","Suffix","Company","Department","Job Title",
"Business Street","Business Street 2","Business Street 3","Business City","Business State",
"Business Postal Code","Business Country/Region","Home Street","Home Street 2","Home Street 3",
"Home City","Home State","Home Postal Code","Home Country/Region","Other Street","Other Street 2",
"Other Street 3","Other City","Other State","Other Postal Code","Other Country/Region",
"Assistant's Phone","Business Fax","Business Phone","Business Phone 2","Callback","Car Phone",
"Company Main Phone","Home Fax","Home Phone","Home Phone 2","ISDN","Mobile Phone","Other Fax",
"Other Phone","Pager","Primary Phone","Radio Phone","TTY/TDD Phone","Telex","Account","Anniversary",
"Assistant's Name","Billing Information","Birthday","Business Address PO Box","Categories","Children",
"Directory Server","E-mail Address","E-mail Type","E-mail Display Name","E-mail 2 Address","E-mail 2 Type",
"E-mail 2 Display Name","E-mail 3 Address","E-mail 3 Type","E-mail 3 Display Name","Gender",
"Government ID Number","Hobby","Home Address PO Box","Initials","Internet Free Busy","Keywords",
"Language","Location","Manager's Name","Mileage","Notes","Office Location","Organizational ID Number",
"Other Address PO Box","Priority","Private","Profession","Referred By","Sensitivity","Spouse","User 1",
"User 2","User 3","User 4","Web Page"]
const emailFields = ['E-mail Address', 'E-mail 2 Address', 'E-mail 3 Address']
const parseOpts = {
auto_parse: true,
columns: true,
relax_column_count: true
}
const writeOpts = {
columns,
header: true
}
function printName (row) {
return `${row['First Name']} ${row['Last Name']}`
}
fs.readFile(B, (err, bFile) => {
if (err) return console.log('reading Bloodworks', err)
csv.parse(bFile, parseOpts, (err, bDat) => {
if (err) return console.log('parsing Bloodworks', err)
let bl = bDat.filter(r => emailFields.some(eName => r[eName]))
let bRemove = bDat.filter(r => emailFields.every(eName => !r[eName]))
fs.readFile(O, (err, oFile) => {
if (err) return console.log('reading Oblit', err)
csv.parse(oFile, parseOpts, (err, oDat) => {
let ol = oDat.filter(r => emailFields.some(eName => r[eName]))
let oRemove = oDat.filter(r => emailFields.every(eName => !r[eName]))
let dups = []
let oCleaned = oDat.filter(oRow => {
oEmails = emailFields.map(eName => oRow[eName]).filter(e => e)
bDups = bDat.filter(bRow => {
bEmails = emailFields.map(eName => bRow[eName]).filter(e => e)
return bEmails.some(bEmail => oEmails.includes(bEmail))
})
if (bDups.length) {
// console.log(printName(oRow), 'same as', bDups.map(d => printName(d)))
dups.push(oRow)
}
return !bDups.length
})
// B Removed
csv.stringify(bRemove, writeOpts, (err, out) => {
if (err) return console.log('gen removed Bloodworks', err)
fs.writeFileSync(bRem, out)
// O Removed
csv.stringify(oRemove, writeOpts, (err, out) => {
if (err) return console.log('gen removed Oblit', err)
fs.writeFileSync(oRem, out)
// B Cleaned
csv.stringify(bl, writeOpts, (err, out) => {
if (err) return console.log('gen cleaned Bloodworks', err)
fs.writeFileSync(bOut, out)
// Dups
csv.stringify(dups, writeOpts, (err, out) => {
if (err) return console.log('gen dups', err)
fs.writeFileSync(overlap, out)
// O Cleaned
csv.stringify(oCleaned, writeOpts, (err, out) => {
if (err) return console.log('gen cleaned oblit', err)
fs.writeFileSync(oOut, out)
})
})
})
})
})
})
})
})
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment