-
-
Save salomonjohns/28ab403e1bf7f7787dd20ffc87ec1c0a to your computer and use it in GitHub Desktop.
Quick-and-dirty Instagram web scrape, modified to collect photo codes, lists and comment count, as well as a few other metrics. SQL code in comments.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
// There are probably better ways to do this | |
// http://www.salomonjohns.com will probably maintain updates to this as I change it and incorporate chart.js or a similar feature set | |
// I am not the creator of this script, I just made changes to it and tied it to MySQL | |
// Look into http://www.jsoneditoronline.org/ to more easily find the fields you are looking for | |
/* SQL Setup | |
-- Adminer 3.3.3 MySQL dump | |
SET NAMES utf8; | |
SET foreign_key_checks = 0; | |
SET time_zone = 'SYSTEM'; | |
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO'; | |
CREATE TABLE `followers_history` ( | |
`followers` mediumint(9) NOT NULL, | |
`dt` date NOT NULL | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
CREATE TABLE `ig_content` ( | |
`postcode` varchar(32) NOT NULL, | |
`likes` mediumint(9) NOT NULL, | |
`comments` mediumint(9) NOT NULL | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
-- 2017-01-20 15:28:11 | |
*/ | |
$servername = "localhost"; | |
$username = "XXXXXXXXXXXXX"; | |
$password = "XXXXXXXXXXXXX"; | |
$dbname = "ig"; | |
// Create connection | |
$conn = new mysqli($servername, $username, $password, $dbname); | |
// Check connection | |
if ($conn->connect_error) { | |
die("Connection failed: " . $conn->connect_error); | |
} | |
//returns a big old hunk of JSON from a non-private IG account page. | |
function scrape_insta($username) { | |
$insta_source = file_get_contents('http://instagram.com/'.$username); | |
$shards = explode('window._sharedData = ', $insta_source); | |
$insta_json = explode(';</script>', $shards[1]); | |
$insta_array = json_decode($insta_json[0], TRUE); | |
return $insta_array; | |
} | |
//Supply a username | |
$my_account = 'xxxxxxxx'; | |
//Do the deed | |
$results_array = scrape_insta($my_account); | |
// Begin Get New Photos | |
// Collects metrics on the photos recently posted (limited to 10) | |
for($cnt=0; $cnt < 10; $cnt++) | |
{ | |
$latest_array = $results_array['entry_data']['ProfilePage'][0]['user']['media']['nodes'][$cnt]; | |
$code = $latest_array['code']; | |
$likes = $latest_array['likes']['count']; | |
$comments = $latest_array['comments']['count']; | |
// Insert Data Into Database | |
// Takes content and inserts it into MySQL database | |
$sql = "INSERT INTO `ig_content` (postcode,likes,comments) VALUE ('$code','$likes','$comments')"; | |
echo 'ID: ' .$latest_array['code']. "<br />\n"; | |
echo 'Likes: ' .$latest_array['likes']['count']. "<br />\n"; | |
echo 'Comments: ' .$latest_array['comments']['count']. "<br />\n"; | |
// Check if photo exists in DB | |
$exists = "SELECT * FROM `ig_content` WHERE postcode LIKE '$code'"; | |
$exists_sql = $conn->query($exists); | |
$num_rows = mysqli_num_rows($exists_sql); | |
if ($num_rows > 0) { | |
echo "Skipping, already present...<br />\n"; | |
} else { | |
$result = $conn->query($sql); | |
if (!$result) { | |
die('Invalid query: ' . mysql_error()); | |
} | |
} | |
} | |
// End Get New Photos | |
// Get the follower count | |
$followers = $results_array['entry_data']['ProfilePage']['0']['user']['followed_by']['count']; | |
echo 'Followers: ' . $followers. "<br />\n"; | |
$dtsim = date('Y-m-j'); | |
// SQL to insert values | |
$sql = "INSERT INTO `followers_history` (followers,dt) VALUE ('$followers', CURDATE())"; | |
// Make sure the value doesnt exist | |
$exists = "SELECT * FROM `followers_history` WHERE dt LIKE '$dtsim'"; | |
$exists_sql = $conn->query($exists); | |
$num_rows = mysqli_num_rows($exists_sql); | |
if ($num_rows > 0) { | |
echo "Skipping, already present...<br />\n"; | |
} else { | |
$result = $conn->query($sql); | |
if (!$result) { | |
die('Invalid query: ' . mysql_error()); | |
} | |
} | |
// End Get Follower Count | |
// Close SQL Connection | |
$conn->close(); | |
?> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment