Skip to content

Instantly share code, notes, and snippets.

@Iqlaas
Forked from Rup1/influencer.js
Created August 1, 2017 00:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Iqlaas/40be545aad7f072f9d14680fda93f8dd to your computer and use it in GitHub Desktop.
Save Iqlaas/40be545aad7f072f9d14680fda93f8dd to your computer and use it in GitHub Desktop.
Exracting instagram info from screenshot images, scraping each profile, and put them into spreadsheets
var webdriver = require('selenium-webdriver'),
chrome = require('selenium-webdriver/chrome'),
By = webdriver.By,
until = webdriver.until,
Key = webdriver.Key;
var o = new chrome.Options();
// o.addArguments('start-fullscreen');
o.addArguments('disable-infobars');
o.addArguments("disable-notifications");
o.setUserPreferences( { credentials_enable_service: false } );
var parseFullName = require('parse-full-name').parseFullName;
var findEmails = require('find-emails-in-string');
var json2csv = require('json2csv');
var fs = require('fs');
var okrabyte = require("okrabyte");
var driver = new webdriver.Builder().withCapabilities(webdriver.Capabilities.chrome()).setChromeOptions(o).build();
//helper find function
var find = function(el){
driver.wait(until.elementLocated(By.css(el)), 5000, "Could not find " + el);
return driver.findElement(By.css(el));
}
//read contents of directory with images
//Since this deals with converting images to text and is error prone, I run this script separately first, so I can validate that it's all correct in the console,
//I then literally copy paste the result into an array :) Ideally, I would simply change this to a funciton that returns an array value ready to be used by the
//rest of the script.
//Read directory contents using 'readdir'
fs.readdir("imgs/", function (err, files) {
if (err) {
throw err;
}
//iterate over each file in the directory.
for(i=0;i<files.length;i++){
//Using okrabyte package read each image file (using readFileSync) and capture text
okrabyte.decodeBuffer(fs.readFileSync("imgs/" + files[i]), function(error, data){
//split the returned string into an array of words (separated by spaces), and strip unwanted characters
var splitWords = data.split(" ");
var word = splitWords[0].replace(/_|[0-9]/g,"")
console.log(word);
})
}
});
//array of influencers extracted from images.
var influencers = [
]
function goToUser() {
//initialize variables to store data from each profile
var followers = null;
var firstName = null;
var middleName = null;
var lastName = null;
var email = null;
var website = null;
var profileUrl = null;
//array to store influencers
var influencerCSV = [];
//csv fields to be created by json2csv
var fields = ["first name", "last name", "followers", "email", "profile url"];
//iterate through array of influencers
for(i=0;i<influencers.length;i++){
//using webdriver, go to the profile page for each list member
driver.get("http://instagram.com/" + influencers[i]);
//Find the div tha contains the followers
find("._9o0bc li:nth-child(2) ._bkw5z").getText().then(function(txt){
followers = txt;
})
//find div containing instagram bio, the first div is always the user's full name. use 'praseFullName' package to split the user's name
//Using a package in order to deal with long names, middle initials, etc
find("._79dar").getText().then(function(txt){
var name = parseFullName(txt);
firstName = name.first;
lastName = name.last;
})
//find div containing instagram bio and extract emails if any.
find("._bugdy").getText().then(function(txt){
var emails = findEmails(txt);
if(emails[0] === undefined){
email = "No email listed";
}else{
email = emails[0];
}
})
//get link to instagram user's profile url.
driver.getCurrentUrl().then(function(url){
profileUrl = url;
})
//By now all the vars contain the desired data for this IG user, create a JSON object for the user with their info.
// I'm executing these functions inside driver.sleep() methods to make them run syncronously because I'm lazy like that.
driver.sleep(100).then(function(){
influencerCSV.push({
"first name": firstName,
"last name": lastName,
"followers": followers,
"email": email,
"profile url": profileUrl
})
})
}
//Convert JSON array to CSV spreadsheet
driver.sleep(500).then(function(){
var csv = json2csv({ data: influencerCSV, fields: fields });
fs.writeFile('file.csv', csv, function(err) {
if (err) throw err;
console.log('file saved');
});
});
}
//Run the function that produces spreadsheet.
goToUser();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment