Skip to content

Instantly share code, notes, and snippets.

@W-A-James
Last active February 9, 2024 18:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save W-A-James/bc03214bb505f6deda3cb2532d8320b4 to your computer and use it in GitHub Desktop.
Save W-A-James/bc03214bb505f6deda3cb2532d8320b4 to your computer and use it in GitHub Desktop.
mergeCSVs.ts
import * as fs from 'fs/promises';
class CSV {
filePath?: string;
headings?: string[];
data?: Record<string, any>[];
private constructor(path?: string, headings?: string[], data?: Record<string, any>[]) {
this.filePath = path;
this.headings = headings;
this.data = data;
}
static fromData(headings: string[], data: Record<string, any>[]): CSV {
const csv = new CSV(undefined, headings, data);
return csv;
}
static async fromFile(path: string): Promise<CSV> {
const csv = new CSV(path);
await csv.init();
return csv;
}
private async init(): Promise<void> {
if (this.headings) return;
let data: string;
try {
data = await fs.readFile(this.filePath!, 'utf8');
} catch (e) {
console.error(e);
return;
}
const lines = data.split('\n');
this.headings = lines[0].split(',');
this.data = [];
for (let i = 1; i < lines.length; i++) {
if (lines[i].length === 0) continue;
const values = lines[i].split(',');
if (values.length !== this.headings.length) {
throw new Error(
`Line ${i + 1} has more/less entries than headings.\nEntries: ${
values.length
}\nHeadings: ${this.headings}`
);
}
const obj: Record<string, any> = Object.create(null);
for (let j = 0; j < this.headings.length; j++) {
obj[this.headings[j]] = values[j];
}
this.data.push(obj);
}
}
merge(
other: CSV,
sortFunc?: (a: Record<string, any>, b: Record<string, any>) => 1 | 0 | -1
): CSV {
// open both files
// Check that headers match
// if they do match, take all entries from first and all entries from second and create new file
// if they don't match, make new headers with common headers at the start
// iterate through first file's entries and replace any fields with
const commonHeaders = [];
const otherHeaders = [];
for (const header of this.headings!) {
if (other.headings!.includes(header)) {
commonHeaders.push(header);
} else {
otherHeaders.push(header);
}
}
for (const header of other.headings!) {
if (!commonHeaders.includes(header)) {
otherHeaders.push(header);
}
}
const joinedHeaders = commonHeaders.concat(otherHeaders);
let newEntries = [];
for (const entry of this.data!.concat(other.data!)) {
const newEntry = { ...entry };
for (const header of joinedHeaders) {
if (newEntry[header] == null) {
newEntry[header] = '-';
}
}
newEntries.push(newEntry);
}
if (sortFunc) {
newEntries = newEntries.sort(sortFunc);
}
return CSV.fromData(joinedHeaders, newEntries);
}
async write(path: string): Promise<void> {
let w: fs.FileHandle;
const lines = [];
try {
w = await fs.open(path, 'w');
} catch (e) {
console.error('Failed to open file');
throw e;
}
lines.push(this.headings!.join(','));
const makeLine = (entry: any) => {
const line = [];
for (const heading of this.headings!) {
line.push(`${entry[heading]}`);
}
const fullLine = line.join(',');
lines.push(fullLine);
};
const numEntries = this.data!.length;
for (let i = 0; i < numEntries; i++) {
makeLine(this.data![i]);
}
makeLine(this.data![numEntries]);
await w.write(lines.join('\n') + '\n');
await w.close();
}
}
(async () => {
if (process.argv[1] !== __filename) return;
const fp1 = process.argv[2],
fp2 = process.argv[3],
newFile = process.argv[4];
if (!fp1) {
console.error('Must provide src1');
process.exit(1);
}
if (!fp2) {
console.error('Must provide src2');
process.exit(1);
}
const [csv1, csv2] = await Promise.all([CSV.fromFile(fp1), CSV.fromFile(fp2)]);
const merged = csv1.merge(csv2);
await merged.write(newFile ? newFile : 'merged.csv');
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment