jaseclamp/linkedin.js

## linkedin.js

/*
what this script does:
it pages through linkedin search results and copies names, images, company, location, position into a javascript array
once it reaches the end of the results it will prompt to download all the names as a tab delimited csv.
to use:
go to linkedin, run a search exactly how you want it
go to page 2 of those results
paste the below script into console and hit enter
paste this line into console to enable the script:
var capture = 1;
paste this line into console to disable the script:
var capture = 0;
the reason for this is once you paste the below script in
it will automatically keep paging through results
the only way to stop it is to paste in and execute var capture = 0;
you must keep your browser window active and in the forefront for this to work
if you can, ensure screen does not turn off, disable screen saver etc
their ember js only loads content it believes is being seen
why it was built this way:
this emulates human usage more.
it loads a page, scrolls up and down then saves data from the page.
I suppose it could have been possible to pull data direct from the json that supplies content to their js app
but as I mention below, that json is complex. I think it would be easier to adapt to changes.
The draw back is it's slower.
Warning - do not use this script to violate any TOS!!!
Only use to supplement the way in which you as a human user would normally page through and look at results.
*/

//set this number to the total search results
//the max is 1000 , linkedin does not show beyond that so narrow search
var total =  991;

var people = [];

var i = 0;

var capture = 1;


(function() {
    var origOpen = XMLHttpRequest.prototype.open;
    XMLHttpRequest.prototype.open = function(method, url) {
    	if (url.includes('blended')) scrape();
        origOpen.apply(this, arguments);
    };
})();


//create a listener for ajax complete
function scrape() {

	//only run if it's the cluster response which is search results coming back
	if( capture==1) {

		//have to scroll page up and down to get ember to load unseen content
		//yes the content is sort of in the xhr object but it's pretty complex to understand the structure
		//of where all the data exists
		jQuery("html, body").animate({ scrollTop: 0 }, 1000);
		jQuery("html, body").animate({ scrollTop: jQuery(document).height() }, 1000);

		//once that is done we capture the content into an array
		setTimeout(function(){

			jQuery.each( jQuery('li.search-result'), function(n,val){

				people[i] = {};

				people[i].name = jQuery(this).find('span.actor-name').text();
				people[i].link = jQuery(this).find("a[href^='/in']").prop('href');
				people[i].des = jQuery(this).find('p.subline-level-1').text().replace(/[\n\r]+/g, '');
				people[i].loc = jQuery(this).find('p.subline-level-2').text().replace(/[\n\r]+/g, '');

				console.log('iteration'+i);
				console.log(people[i]);

				//increment array counter
				i++;

			});

			//if we're still rolling lets simulate click next
			if(i<=total)
			{
				jQuery(".artdeco-pagination__button--next").click();
			//otherwise we're going to do a tsv download of all  the data
			}else{
				var tsv = tabValues(people);
				var hiddenElement = document.createElement('a');
				hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(tsv);
				hiddenElement.target = '_blank';
				hiddenElement.download = 'people.csv';
				hiddenElement.click();
			}


		}, 3000);


	}

};


function tabValues(array) {

    var keys = Object.keys(array[0]);

    var result = keys.join("\t") + "\n";

    array.forEach(function(obj){
        keys.forEach(function(k, ix){
            if (ix) result += "\t";
            result += obj[k];
        });
        result += "\n";
    });

    return result;
}

	/*
	what this script does:
	it pages through linkedin search results and copies names, images, company, location, position into a javascript array
	once it reaches the end of the results it will prompt to download all the names as a tab delimited csv.
	to use:
	go to linkedin, run a search exactly how you want it
	go to page 2 of those results
	paste the below script into console and hit enter
	paste this line into console to enable the script:
	var capture = 1;
	paste this line into console to disable the script:
	var capture = 0;
	the reason for this is once you paste the below script in
	it will automatically keep paging through results
	the only way to stop it is to paste in and execute var capture = 0;
	you must keep your browser window active and in the forefront for this to work
	if you can, ensure screen does not turn off, disable screen saver etc
	their ember js only loads content it believes is being seen
	why it was built this way:
	this emulates human usage more.
	it loads a page, scrolls up and down then saves data from the page.
	I suppose it could have been possible to pull data direct from the json that supplies content to their js app
	but as I mention below, that json is complex. I think it would be easier to adapt to changes.
	The draw back is it's slower.
	Warning - do not use this script to violate any TOS!!!
	Only use to supplement the way in which you as a human user would normally page through and look at results.
	*/

	//set this number to the total search results
	//the max is 1000 , linkedin does not show beyond that so narrow search
	var total = 991;

	var people = [];

	var i = 0;

	var capture = 1;


	(function() {
	var origOpen = XMLHttpRequest.prototype.open;
	XMLHttpRequest.prototype.open = function(method, url) {
	if (url.includes('blended')) scrape();
	origOpen.apply(this, arguments);
	};
	})();


	//create a listener for ajax complete
	function scrape() {

	//only run if it's the cluster response which is search results coming back
	if( capture==1) {

	//have to scroll page up and down to get ember to load unseen content
	//yes the content is sort of in the xhr object but it's pretty complex to understand the structure
	//of where all the data exists
	jQuery("html, body").animate({ scrollTop: 0 }, 1000);
	jQuery("html, body").animate({ scrollTop: jQuery(document).height() }, 1000);

	//once that is done we capture the content into an array
	setTimeout(function(){

	jQuery.each( jQuery('li.search-result'), function(n,val){

	people[i] = {};

	people[i].name = jQuery(this).find('span.actor-name').text();
	people[i].link = jQuery(this).find("a[href^='/in']").prop('href');
	people[i].des = jQuery(this).find('p.subline-level-1').text().replace(/[\n\r]+/g, '');
	people[i].loc = jQuery(this).find('p.subline-level-2').text().replace(/[\n\r]+/g, '');

	console.log('iteration'+i);
	console.log(people[i]);

	//increment array counter
	i++;

	});

	//if we're still rolling lets simulate click next
	if(i<=total)
	{
	jQuery(".artdeco-pagination__button--next").click();
	//otherwise we're going to do a tsv download of all the data
	}else{
	var tsv = tabValues(people);
	var hiddenElement = document.createElement('a');
	hiddenElement.href = 'data:text/csv;charset=utf-8,' + encodeURI(tsv);
	hiddenElement.target = '_blank';
	hiddenElement.download = 'people.csv';
	hiddenElement.click();
	}


	}, 3000);



	}

	};


	function tabValues(array) {

	var keys = Object.keys(array[0]);

	var result = keys.join("\t") + "\n";

	array.forEach(function(obj){
	keys.forEach(function(k, ix){
	if (ix) result += "\t";
	result += obj[k];
	});
	result += "\n";
	});

	return result;
	}