Last active
February 9, 2024 18:28
-
-
Save W-A-James/bc03214bb505f6deda3cb2532d8320b4 to your computer and use it in GitHub Desktop.
mergeCSVs.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as fs from 'fs/promises'; | |
class CSV { | |
filePath?: string; | |
headings?: string[]; | |
data?: Record<string, any>[]; | |
private constructor(path?: string, headings?: string[], data?: Record<string, any>[]) { | |
this.filePath = path; | |
this.headings = headings; | |
this.data = data; | |
} | |
static fromData(headings: string[], data: Record<string, any>[]): CSV { | |
const csv = new CSV(undefined, headings, data); | |
return csv; | |
} | |
static async fromFile(path: string): Promise<CSV> { | |
const csv = new CSV(path); | |
await csv.init(); | |
return csv; | |
} | |
private async init(): Promise<void> { | |
if (this.headings) return; | |
let data: string; | |
try { | |
data = await fs.readFile(this.filePath!, 'utf8'); | |
} catch (e) { | |
console.error(e); | |
return; | |
} | |
const lines = data.split('\n'); | |
this.headings = lines[0].split(','); | |
this.data = []; | |
for (let i = 1; i < lines.length; i++) { | |
if (lines[i].length === 0) continue; | |
const values = lines[i].split(','); | |
if (values.length !== this.headings.length) { | |
throw new Error( | |
`Line ${i + 1} has more/less entries than headings.\nEntries: ${ | |
values.length | |
}\nHeadings: ${this.headings}` | |
); | |
} | |
const obj: Record<string, any> = Object.create(null); | |
for (let j = 0; j < this.headings.length; j++) { | |
obj[this.headings[j]] = values[j]; | |
} | |
this.data.push(obj); | |
} | |
} | |
merge( | |
other: CSV, | |
sortFunc?: (a: Record<string, any>, b: Record<string, any>) => 1 | 0 | -1 | |
): CSV { | |
// open both files | |
// Check that headers match | |
// if they do match, take all entries from first and all entries from second and create new file | |
// if they don't match, make new headers with common headers at the start | |
// iterate through first file's entries and replace any fields with | |
const commonHeaders = []; | |
const otherHeaders = []; | |
for (const header of this.headings!) { | |
if (other.headings!.includes(header)) { | |
commonHeaders.push(header); | |
} else { | |
otherHeaders.push(header); | |
} | |
} | |
for (const header of other.headings!) { | |
if (!commonHeaders.includes(header)) { | |
otherHeaders.push(header); | |
} | |
} | |
const joinedHeaders = commonHeaders.concat(otherHeaders); | |
let newEntries = []; | |
for (const entry of this.data!.concat(other.data!)) { | |
const newEntry = { ...entry }; | |
for (const header of joinedHeaders) { | |
if (newEntry[header] == null) { | |
newEntry[header] = '-'; | |
} | |
} | |
newEntries.push(newEntry); | |
} | |
if (sortFunc) { | |
newEntries = newEntries.sort(sortFunc); | |
} | |
return CSV.fromData(joinedHeaders, newEntries); | |
} | |
async write(path: string): Promise<void> { | |
let w: fs.FileHandle; | |
const lines = []; | |
try { | |
w = await fs.open(path, 'w'); | |
} catch (e) { | |
console.error('Failed to open file'); | |
throw e; | |
} | |
lines.push(this.headings!.join(',')); | |
const makeLine = (entry: any) => { | |
const line = []; | |
for (const heading of this.headings!) { | |
line.push(`${entry[heading]}`); | |
} | |
const fullLine = line.join(','); | |
lines.push(fullLine); | |
}; | |
const numEntries = this.data!.length; | |
for (let i = 0; i < numEntries; i++) { | |
makeLine(this.data![i]); | |
} | |
makeLine(this.data![numEntries]); | |
await w.write(lines.join('\n') + '\n'); | |
await w.close(); | |
} | |
} | |
(async () => { | |
if (process.argv[1] !== __filename) return; | |
const fp1 = process.argv[2], | |
fp2 = process.argv[3], | |
newFile = process.argv[4]; | |
if (!fp1) { | |
console.error('Must provide src1'); | |
process.exit(1); | |
} | |
if (!fp2) { | |
console.error('Must provide src2'); | |
process.exit(1); | |
} | |
const [csv1, csv2] = await Promise.all([CSV.fromFile(fp1), CSV.fromFile(fp2)]); | |
const merged = csv1.merge(csv2); | |
await merged.write(newFile ? newFile : 'merged.csv'); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment