Extract instagram info from screenshot, scrape each profile, put data into spreadsheet
Little background story:
One day before going to a brunch party, my girlfriend got an email from her employer with a bunch of screenshots of
instagram/n influencer accounts. They asked that she put their names, websites, emails and other relevant info in a
spreadsheet. There were hundreds of accounts and this would have taken her at least a full day - ruining our party plans.
So I wrote this script to do it for her - I researched packages on the spot and was amazed at how easy it was to find node
packages that did what I needed. Then it was just . amatter of stringing everything together. It'ts not pretty, but it took
about an hour and a half to write (targeting the right css selectors was a pain) and got the job done in about 15 minutes.
var webdriver = require('selenium-webdriver'),
chrome = require('selenium-webdriver/chrome'),
By = webdriver.By,
until = webdriver.until,
Key = webdriver.Key;
var o = new chrome.Options();
// o.addArguments('start-fullscreen');
o.setUserPreferences( { credentials_enable_service: false } );
var parseFullName = require('parse-full-name').parseFullName;
var findEmails = require('find-emails-in-string');
var json2csv = require('json2csv');
var fs = require('fs');
var okrabyte = require("okrabyte");
var driver = new webdriver.Builder().withCapabilities(;
//helper find function
var find = function(el){
driver.wait(until.elementLocated(By.css(el)), 5000, "Could not find " + el);
return driver.findElement(By.css(el));
//read contents of directory with images
//Since this deals with converting images to text and is error prone, I run this script separately first, so I can validate that it's all correct in the console,
//I then literally copy paste the result into an array :) Ideally, I would simply change this to a funciton that returns an array value ready to be used by the
//rest of the script.
//Read directory contents using 'readdir'
fs.readdir("imgs/", function (err, files) {
if (err) {
throw err;
//iterate over each file in the directory.
//Using okrabyte package read each image file (using readFileSync) and capture text
okrabyte.decodeBuffer(fs.readFileSync("imgs/" + files[i]), function(error, data){
//split the returned string into an array of words (separated by spaces), and strip unwanted characters
var splitWords = data.split(" ");
var word = splitWords[0].replace(/_|[0-9]/g,"")
//array of influencers extracted from images.
var influencers = [
function goToUser() {
//initialize variables to store data from each profile
var followers;
var firstName;
var middleName;
var lastName;
var email;
var website;
var profileUrl;
//array to store influencers
var influencerCSV = [];
//csv fields to be created by json2csv
var fields = ["first name", "last name", "followers", "email", "profile url"];
//iterate through array of influencers
//using webdriver, go to the profile page for each list member
driver.get("" + influencers[i]);
//Find the div tha contains the followers
find("._9o0bc li:nth-child(2) ._bkw5z").getText().then(function(txt){
followers = txt;
//find div containing instagram bio, the first div is always the user's full name. use 'praseFullName' package to split the user's name
//Using a package in order to deal with long names, middle initials, etc
var name = parseFullName(txt);
firstName = name.first;
lastName = name.last;
//find div containing instagram bio and extract emails if any.
var emails = findEmails(txt);
if(emails[0] === undefined){
email = "No email listed";
email = emails[0];
//get link to instagram user's profile url.
profileUrl = url;
//By now all the vars contain the desired data for this IG user, create a JSON object for the user with their info.
// I'm executing these functions inside driver.sleep() methods to make them run syncronously because I'm lazy like that.
"first name": firstName,
"last name": lastName,
"followers": followers,
"email": email,
"profile url": profileUrl
//Convert JSON array to CSV spreadsheet
var csv = json2csv({ data: influencerCSV, fields: fields });
fs.writeFile('file.csv', csv, function(err) {
if (err) throw err;
console.log('file saved');
//Run the function that produces spreadsheet.
