Skip to content

Instantly share code, notes, and snippets.

@salomonjohns
Forked from cosmocatalano/instagram_scrape.php
Last active January 20, 2017 12:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save salomonjohns/28ab403e1bf7f7787dd20ffc87ec1c0a to your computer and use it in GitHub Desktop.
Save salomonjohns/28ab403e1bf7f7787dd20ffc87ec1c0a to your computer and use it in GitHub Desktop.
Quick-and-dirty Instagram web scrape, modified to collect photo codes, lists and comment count, as well as a few other metrics. SQL code in comments.
<?php
// There are probably better ways to do this
// http://www.salomonjohns.com will probably maintain updates to this as I change it and incorporate chart.js or a similar feature set
// I am not the creator of this script, I just made changes to it and tied it to MySQL
// Look into http://www.jsoneditoronline.org/ to more easily find the fields you are looking for
/* SQL Setup
-- Adminer 3.3.3 MySQL dump
SET NAMES utf8;
SET foreign_key_checks = 0;
SET time_zone = 'SYSTEM';
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO';
CREATE TABLE `followers_history` (
`followers` mediumint(9) NOT NULL,
`dt` date NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `ig_content` (
`postcode` varchar(32) NOT NULL,
`likes` mediumint(9) NOT NULL,
`comments` mediumint(9) NOT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- 2017-01-20 15:28:11
*/
$servername = "localhost";
$username = "XXXXXXXXXXXXX";
$password = "XXXXXXXXXXXXX";
$dbname = "ig";
// Create connection
$conn = new mysqli($servername, $username, $password, $dbname);
// Check connection
if ($conn->connect_error) {
die("Connection failed: " . $conn->connect_error);
}
//returns a big old hunk of JSON from a non-private IG account page.
function scrape_insta($username) {
$insta_source = file_get_contents('http://instagram.com/'.$username);
$shards = explode('window._sharedData = ', $insta_source);
$insta_json = explode(';</script>', $shards[1]);
$insta_array = json_decode($insta_json[0], TRUE);
return $insta_array;
}
//Supply a username
$my_account = 'xxxxxxxx';
//Do the deed
$results_array = scrape_insta($my_account);
// Begin Get New Photos
// Collects metrics on the photos recently posted (limited to 10)
for($cnt=0; $cnt < 10; $cnt++)
{
$latest_array = $results_array['entry_data']['ProfilePage'][0]['user']['media']['nodes'][$cnt];
$code = $latest_array['code'];
$likes = $latest_array['likes']['count'];
$comments = $latest_array['comments']['count'];
// Insert Data Into Database
// Takes content and inserts it into MySQL database
$sql = "INSERT INTO `ig_content` (postcode,likes,comments) VALUE ('$code','$likes','$comments')";
echo 'ID: ' .$latest_array['code']. "<br />\n";
echo 'Likes: ' .$latest_array['likes']['count']. "<br />\n";
echo 'Comments: ' .$latest_array['comments']['count']. "<br />\n";
// Check if photo exists in DB
$exists = "SELECT * FROM `ig_content` WHERE postcode LIKE '$code'";
$exists_sql = $conn->query($exists);
$num_rows = mysqli_num_rows($exists_sql);
if ($num_rows > 0) {
echo "Skipping, already present...<br />\n";
} else {
$result = $conn->query($sql);
if (!$result) {
die('Invalid query: ' . mysql_error());
}
}
}
// End Get New Photos
// Get the follower count
$followers = $results_array['entry_data']['ProfilePage']['0']['user']['followed_by']['count'];
echo 'Followers: ' . $followers. "<br />\n";
$dtsim = date('Y-m-j');
// SQL to insert values
$sql = "INSERT INTO `followers_history` (followers,dt) VALUE ('$followers', CURDATE())";
// Make sure the value doesnt exist
$exists = "SELECT * FROM `followers_history` WHERE dt LIKE '$dtsim'";
$exists_sql = $conn->query($exists);
$num_rows = mysqli_num_rows($exists_sql);
if ($num_rows > 0) {
echo "Skipping, already present...<br />\n";
} else {
$result = $conn->query($sql);
if (!$result) {
die('Invalid query: ' . mysql_error());
}
}
// End Get Follower Count
// Close SQL Connection
$conn->close();
?>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment