Skip to content

Instantly share code, notes, and snippets.

@spattanaik75
Created July 19, 2019 12:34
Show Gist options
  • Save spattanaik75/8637fabf61ef06b7f0c0de1127fd37c4 to your computer and use it in GitHub Desktop.
Save spattanaik75/8637fabf61ef06b7f0c0de1127fd37c4 to your computer and use it in GitHub Desktop.
// we set up the data fetch and hand the data
// to our main function
const fetch = require('node-fetch');
const getRedditJSONUrl = url => url.replace(/\/?$/, '.json');
const fetchData = url => fetch(url).then(response => response.json());
const extractPosts = redditPage => redditPage.data.children;
const extractPostTextAndTitle = post => post.data.title + '\n' + post.data.selftext;
const _wordCount = require('@iarna/word-count');
const countWords = text => _wordCount(text)
const numberValueSorter = (a, b) => a - b;
const calculateMedian = list => {
// an empty list has no median
if (list.length == 0) return undefined;
// sort the values
const sorted = Array.from(list).sort(numberValueSorter);
if (sorted.length % 2 == 0) {
// we're dealing with an even-sized set, so take the midpoint
// of the middle two values
const a = sorted.length / 2 - 1;
const b = a + 1;
return (list[a] + list[b]) / 2;
} else {
// pick the middle value
const i = Math.floor(sorted.length / 2);
return list[i];
}
}
const countComments = post => post.data.num_comments;
const hasImageAttached = post => post.data.post_hint == 'image';
const calculateRatio = array => {
if (array.length == 0) return undefined;
return array.filter(value => !!value).length / array.length;
};
const map = (...mappers) =>
async array => { // we now have to return an async function
const results = [];
for (const value of array) { // for each value of the array,
let result = value; // set the first intermediate result to the first value;
for (const mapper of mappers) // take each mapper;
result = await mapper(result); // and pass the intermediate result to the next;
results.push(result); // and push the result onto the results array;
}
return results; // return the final array
};
const pipeline = (...steps) => { // take a list of steps defining the process
return async input => { // and return an async function that takes input;
let result = input; // the first intermediate result is the input;
for (const step of steps) // iterate over each step;
result = await step(result); // run the step on the result and update it;
return result; // return the last result!
};
};
const fork = (...pipelines) => // a function that takes a list of pipelines,
async value => // returns an async function that takes a value;
await Promise.all( // it returns the results of promises...
pipelines.map( // ...mapped over pipelines...
pipeline => pipeline(value) // ...that are passed the value.
)
);
const distribute = pipeline => values => Promise.all(values.map(pipeline));
const getMedianWordCount = pipeline(
map(
extractPostTextAndTitle,
countWords
),
calculateMedian
);
const getMedianCommentCount = pipeline(
map(countComments),
calculateMedian
);
const getImagePresentRatio = pipeline(
map(hasImageAttached),
calculateRatio
);
// this is a convenience function that associates names to the results returned
const joinResults = ([
medianWordCount,
medianCommentCount,
imagePresentRatio
]) => ({
medianWordCount,
medianCommentCount,
imagePresentRatio
});
// the process function, now with forking!
const getSubredditMetrics = pipeline(
getRedditJSONUrl,
fetchData,
extractPosts,
fork(
getMedianWordCount,
getMedianCommentCount,
getImagePresentRatio
),
joinResults
);
const URLs = [
'https://www.reddit.com/r/dataisbeautiful/',
'https://www.reddit.com/r/proceduralgeneration/'
];
const getAllReports = distribute(getSubredditMetrics);
getAllReports (URLs)
.then(results => {
const reports = results.map((report, idx) => ({
url: URLs[idx],
report
}));
console.log(reports);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment