// Usage: | |
// Copy and paste all of this into a debug console window of the "Who is Hiring?" comment thread | |
// then use as follows: | |
// | |
// query(term | [term, term, ...], term | [term, term, ...], ...) | |
// | |
// When arguments are in an array then that means an "or" and when they are seperate that means "and" | |
// | |
// Term is of the format: | |
// ((-)text/RegExp) ( '-' means negation ) | |
// | |
// A first argument of '+' signifies an additional pass on the filtered data as opposed to | |
// resetting everything. | |
// | |
// Example: Let's look for jobs in california that involve rust or python and not crypto: | |
// | |
// > query('ca', '-crypto', ['rust', 'python']); | |
// {filtered: '98.57%', query: 'ca AND NOT crypto AND (rust OR python)'} | |
// | |
// Then you see, "oh right, I don't care about blockchain either": | |
// | |
// > query('+', '-blockchain'); | |
// {filtered: '98.57%', query: 'ca AND NOT crypto AND (rust OR python) AND NOT blockchain'} | |
// | |
// Another example: | |
// > query(['ca', 'sf', 'san jose', 'mountan view']) | |
// {filtered: '90.61%', query: '(ca OR sf OR san jose OR mountan view)'} | |
// | |
// COVID killed Silicon Valley. Quod Erat Demonstrandum! | |
// | |
// Changelog for 2022-08-02 | |
// | |
// ADDED | |
// | |
// * Negation via '-' | |
// | |
// * Multi-pass querying via first argument being '+' | |
// | |
// * Debugging query string added in the response | |
// | |
// CHANGED | |
// | |
// * "or" and "and" works the opposite of how it did previously. | |
// This form seems to be more useful. | |
// | |
// * Whole word matching is default | |
// | |
// * Terms such as "c++" are properly escaped | |
// | |
// UPDATED | |
// | |
// * Rewrote as an absurd implementation. | |
// I had a fun afternoon writing this. | |
// | |
function query(...queryList) { | |
// HN is done with very unsemantic classes. | |
let jobList = [...document.querySelectorAll('.c5a,.cae,.c00,.c9c,.cdd,.c73,.c88')], | |
// Traverses up the dom stack trying to find a match of a specific class | |
upto = (node, klass) => node.classList.contains(klass) ? node : upto(node.parentNode, klass), | |
display = (node, what) => upto(node, 'athing').style.display = what, | |
hide = node => { display(node, 'none'); node.show = false}, | |
show = node => { display(node, 'block'); node.show = true}, | |
// Use RegExp as is. Otherwise make it a case insensitive RegExp | |
destring = what => [ | |
what[0] === '-', | |
what.test ? what : new RegExp([ | |
'\\b', | |
what.toString() | |
.replace(/^-/,'') | |
.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), | |
'\\b' | |
].join(''), 'i'), what | |
]; | |
// This is our grand reset | |
if(queryList[0] !== '+') { | |
jobList.forEach(show); | |
// Have fun with that. | |
query.hidden = +!( query.fn = [] ); | |
} else { | |
queryList.shift(); | |
} | |
// The AND is an artifact of the design. It's just iterative napped subsets | |
query.fn = query.fn.concat(queryList.map(arg => { | |
// Make it an array if it isn't one and pass it through our destring | |
let orList = Array.of(arg).flat().map(destring); | |
// If we're showing the job, then go through the list of terms | |
// If all of them do not match, hide it, then return the length. | |
query.hidden += jobList.filter(node => node.show | |
&& orList.every(([neg, r]) => neg ^ !(node.innerHTML.search(r) + 1)) | |
).map(hide).length; | |
// You're on your own here - this is just the construction of | |
// the debug string. There's far more reasonable ways to do this | |
// But what fun would that be?! | |
return ( | |
' ('[+!!(orList.length - 1)] + | |
orList.map(([neg, ig, r]) => ['', 'NOT '][+neg] + r.slice(+neg).join(' OR ') + | |
' )'[+!!(orList.length - 1)] | |
).trim(); | |
})); | |
return { | |
filtered: (100 * query.hidden / jobList.length).toFixed(2) + '%', | |
query: query.fn.join(' AND ') | |
}; | |
} |
This script loads all pages via AJAX; you may execute it before this one so you search on all pages instead of just first one
;(function ajaxLoadNextPage () {
var more = document.querySelector('.comment-tree > tbody > tr:last-child a');
if (more && more.innerHTML === "More") {
var httpRequest = new XMLHttpRequest();
httpRequest.onreadystatechange = function () {
if (httpRequest.readyState === XMLHttpRequest.DONE) {
if (httpRequest.status === 200) {
more.remove();
var div = document.createElement('div');
div.innerHTML = httpRequest.responseText;
var nextHTML = div.querySelector('.comment-tree > tbody').innerHTML;
document.querySelector('.comment-tree > tbody').innerHTML += nextHTML;
ajaxLoadNextPage();
} else {
alert('There was a problem with the request to ' + more.href);
}
}
};
httpRequest.open('GET', more.href);
httpRequest.send();
}
})();
Any plans to package this as an extension?
I was revisiting this this month ... I think what I really want these days is exclusion more than inclusion. For instance, I don't care about healthcare, remote e-learning or fintech (I find them to be huxsters trying to arbitrage broken markets with snake oil tech) but anyway ... a blacklist seems really useful ... I should do that instead.
This also works, replace the id with whatever you want.
curl 'https://hacker-news.firebaseio.com/v0/item/31947297.json?print=pretty' | jq '.kids' | grep -Po '[0-9]*' | xargs -n 1 -P 20 -I %% wget https://hacker-news.firebaseio.com/v0/item/%%.json\?print=pretty
Then you can grep that.
ok I updated it to implement all the things I've been musing about for 7 years and to hopefully make you laugh out loud while reading it.
It is extremely silly but hopefully not stupid and still legible
Thanks @kristopolous and @meiamsome, browser search functionality is definitely not enough to search hundreds of job positions!
Because I wanted a mix of both scripts (i.e. nested criterias AND regular expressions being first-class objects), and because it was fun to write, I ended up creating just another version which looks like this:
Details at https://gist.github.com/frosas/4cadd8392a3c4af82ef640cbedea3027