Skip to content

Instantly share code, notes, and snippets.

@eenblam
Last active October 15, 2019 11:00
Show Gist options
  • Save eenblam/8897a17fe5a917af53ccf38afd62302e to your computer and use it in GitHub Desktop.
Save eenblam/8897a17fe5a917af53ccf38afd62302e to your computer and use it in GitHub Desktop.
Naive JS implementation of tidyr's gather function. Intended for use with JSON-styled tabular data... like you'd get from d3.dsv
'use strict';
let R = require('ramda');
// lengthenRow :: String -> String -> [Object]
let lengthenRow = R.curry(function (keyLabel, valueLabel, row) {
let customKV = key => ({[keyLabel]: key,
[valueLabel]: row[key]});
return Object.keys(row).map(customKV);
});
// gatherRow :: String -> String -> [String] -> Object -> [Object]
let gatherRow = R.curry(function (keyLabel, valueLabel, columns, row) {
// Convert wide JSON representation of CSV row into an array long format rows
let pickWithout = R.pick(R.difference(R.keys(row), columns));
let pickWith = R.pick(columns);
let kept = pickWithout(row);
let wide = pickWith(row);
let lengthened = lengthenRow(keyLabel, valueLabel, wide);
let mergeAll = R.map(R.merge(kept));
return mergeAll(lengthened);
});
module.exports.gatherRow = gatherRow;
'use strict';
let R = require('ramda');
// lengthenRow :: String -> String -> [Object]
let lengthenRow = R.curry(function (keyLabel, valueLabel, row) {
// This point-free thing is kinda cute, but I think the original was clearer.
// Note: lengthenRow is just the trivial gatherRow when no columns are kept!
let customKV = key => ({[keyLabel]: key,
[valueLabel]: row[key]});
let customEntries = R.map(customKV);
let lengthen = R.compose(customEntries, R.keys);
return lengthen(row);
});
// gatherRow :: String -> String -> [String] -> Object -> [Object]
let gatherRow = R.curry(function (keyLabel, valueLabel, columns, row) {
// Convert wide JSON representation of CSV row into long format
let pickWithout = R.pick(R.difference(R.keys(row), columns));
let pickWith = R.pick(columns);
let kept = pickWithout(row);
let wide = pickWith(row);
let lengthened = lengthenRow(keyLabel, valueLabel, wide);
let mergeAll = R.map(R.merge(kept));
return mergeAll(lengthened);
});
module.exports.gatherRow = gatherRow;
'use strict';
function withFields(record, fields) {
// Returns record with only properties specified in fields
return fields.reduce((acc, key) =>
{acc[key] = record[key]; return acc;}, {});
}
function splitRecord(record, ...fields) {
let withGivenFields = withFields(record, fields);
let otherFields = Object.keys(record)
.filter(key => !(fields.includes(key)));
let withOtherFields = withFields(record, otherFields);
return [withOtherFields, withGivenFields];
}
function gather(data, keyLabel, valueLabel, ...columns) {
// Convert wide JSON representation of CSV into long format
let lengthen = record => Object.keys(record)
.map(key => ({[keyLabel]: key,
[valueLabel]: record[key]}));
return data.map(record => {
let [keptFields, wideFields] = splitRecord(record, ...columns);
let longFields = lengthen(wideFields);
let nestedArrays = longFields.map(
longField => Object.assign({}, longField, keptFields)
);
return nestedArrays;
}).reduce((acc, arr) => acc.concat(arr), []);
}
module.exports.gather = gather;
@eenblam
Copy link
Author

eenblam commented Oct 27, 2016

gather-vanilla

Usage:

const data = [
  {Factor1: 'x1', A: 1, B: 74, C: 0.3},
  {Factor1: 'x2', A: 2, B: 89, C: 0.12},
  {Factor1: 'x3', A: 3, B: 30, C: 0.5}
  ];

const fields = ['A', 'B', 'C'];
const out = gather(data, 'Factor2', 'Value', ...fields);
out.forEach(item => { console.log(item); });

...which should produce...

Object {Factor2: "A", Value: 1, Factor1: "x1"}
Object {Factor2: "B", Value: 74, Factor1: "x1"}
Object {Factor2: "C", Value: 0.3, Factor1: "x1"}
Object {Factor2: "A", Value: 2, Factor1: "x2"}
Object {Factor2: "B", Value: 89, Factor1: "x2"}
Object {Factor2: "C", Value: 0.12, Factor1: "x2"}
Object {Factor2: "A", Value: 3, Factor1: "x3"}
Object {Factor2: "B", Value: 30, Factor1: "x3"}
Object {Factor2: "C", Value: 0.5, Factor1: "x3"}

Note the spread operator (...) in the signature of gather. This means that the two lines below are equivalent:

gather(data, 'Factor2', 'Value', ...fields)
gather(data, 'Factor', 'Value', 'A', 'B', 'C')

This way, gather can be explicitly parameterized for a small number of fields, but we can also dump a larger number of values in. Suppose we have a very wide data set, with 28 columns, ['Factor1', 'Factor2', 'A', 'B', ..., 'Z']. If we wish to "lengthen" all but the first two columns, 'Factor1' and 'Factor2', we could simply relabel them all under Factor3 like so:

let columns = Object.keys(d3.values(reallyWideData)[0]);
let theAlphabet = columns.slice(2);
let longData = gather(reallyWideData, 'Factor3', 'Value', ...theAlphabet);

gather-ramda

let k = 'Factor2';
let v = 'Value';
let columns = ['A','B', 'C'];
let gatherer = gatherRow(k, v, columns);

Now, we could just reduce by concat...

let gather = R.compose(R.flatten, R.map(gatherer));
let gathered = gather(data);

...or, we can apply gatherRow to an arbitrary stream of rows with most.concatMap!

let most = require('most');
let gatheredStream = streamOfWideRows.concatMap(gatherer);

@dalcib
Copy link

dalcib commented Oct 15, 2019

In the gather-vanilla.js the variables are changed in lines 25 and 27.

return data.map(record => {
   let [keptFields, wideFields] = splitRecord(record, ...columns);
   let longFields = lengthen(keptFields); //here
   let nestedArrays = longFields.map(
     longField => Object.assign({}, longField, wideFields) // and here
   );
   return nestedArrays;
 }).reduce((acc, arr) => acc.concat(arr), []);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment