Skip to content

Instantly share code, notes, and snippets.

@tomkdickinson
Created January 5, 2017 20:19
Show Gist options
  • Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
Save tomkdickinson/a093d30523dd77ae970f3ffcf26e1344 to your computer and use it in GitHub Desktop.
Followers Extraction Instagram
import json
import requests
import logging as log
log.basicConfig(level=log.DEBUG)
class FollowerExtractor():
"""
Extracts followers for a given profile
"""
def __init__(self, username, password):
self.csrf_token, self.cookie_string = FollowerExtractor.login_instagram(username, password)
log.info("CSRF Token set to %s", self.csrf_token)
log.info("Cookie String set to %s" % self.cookie_string)
@staticmethod
def get_csrf_and_cookie_string():
resp = requests.head("https://www.instagram.com")
return resp.cookies['csrftoken'], resp.headers['set-cookie']
@staticmethod
def login_instagram(username, password):
csrf_token, cookie_string = FollowerExtractor.get_csrf_and_cookie_string()
data = {"username": username, "password": password}
resp = requests.post("https://www.instagram.com/accounts/login/ajax/",
data=data,
headers={
"referer": "https://www.instagram.com/",
"accept": "*/*",
"Accept-Language": "en-GB,en;q=0.8",
"cache-control": "no-cache",
"content-length": "40",
"Content-Type": "application/x-www-form-urlencoded",
"cookie": cookie_string,
"origin": "https://www.instagram.com",
"pragma": "no-cache",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
"x-csrftoken": csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
})
return resp.cookies['csrftoken'], resp.headers['set-cookie']
def extract_followed_by(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_followed_by(username, user_id)
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
while resp['followed_by']['page_info']['has_next_page']:
resp = self.query_followed_by(username, user_id, resp['followed_by']['page_info']['end_cursor'])
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
followers += resp['followed_by']['nodes']
return followers
def extract_following(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_following(username, user_id)
followers = resp['follows']['nodes']
self.save_following(followers)
while resp['follows']['page_info']['has_next_page']:
resp = self.query_following(username, user_id, resp['follows']['page_info']['end_cursor'])
followers = resp['follows']['nodes']
self.save_following(followers)
followers += resp['follows']['nodes']
return followers
def query_following(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_following_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def query_followed_by(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_followed_by_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def get_headers(self, referrer):
"""
Returns a bunch of headers we need to use when querying Instagram
:param referrer: The page referrer URL
:return: A dict of headers
"""
return {
"referer": referrer,
"accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"cookie": self.cookie_string,
"origin": "https://www.instagram.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/49.0.2623.87 Safari/537.36",
"x-csrftoken": self.csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
}
@staticmethod
def get_followed_by_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { followed_by.first(20) {" % user_id
else:
start_query = "ig_user(%s) { followed_by.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
@staticmethod
def get_following_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { follows.first(20) {" % user_id
else:
start_query = "ig_user(%s) { follows.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
def save_following(self, following):
"""
Called when a new batch of following users has been extracted from Instagram
:param following: Users who are following user
"""
for user in following:
print("Following: %s" % user['username'])
def save_followed_by(self, followed_by):
"""
Called when a new batch of followed_by users has been extracted from Instagram
:param following: Users who are followed_by
"""
for user in followed_by:
print("Followed By: %s" % user['username'])
if __name__ == '__main__':
instagram_username = "your_username"
instagram_password = "your_password"
followed_extractor = FollowerExtractor(instagram_username, instagram_password)
followed_extractor.extract_following("justintimberlake")
followed_extractor.extract_followed_by("justintimberlake")
@andreasasprou
Copy link

Has anyone got this working? When I run it, it manages to get the token, but I have a 403 response from the authenticated requests. Any advice?

@zm030215
Copy link

I cannot work it out either, I also got 403 response. I assuem Instagram is forbidden some personal applications.

@samequefarias
Copy link

Good Morning,
I'm trying to find some algorithm in python that can collect geolocation data from instagram here in my city. Do you know any way? Thank you very much in advance.

@OwlGreenApple
Copy link

hi can this code still works ? Instagram give some changes recently. And it need some variable like query_id

@HaiGenkiDes
Copy link

@samequefarias - geolocation instagram
This is one workaround but i'm sure there is a better way,

  1. make a call to facebook graph API with lat lon of city -> get IDs of locations
    https://graph.facebook.com/search?q=&type=place&center=51.5074,-0.1278&distance=5000&access_token=ACCESS-TOKEN
  2. make a call to instagramI with facebook location IDs -> get Instagram location ID's
    https://api.instagram.com/v1/locations/search?facebook_places_id=273471170716&access_token=ACCESS-TOKEN
  3. make a call to instagram api with location ID -> get recent media
    https://api.instagram.com/v1/locations/30824484/media/recent?access_token=ACCESS-TOKEN

@RomanKlimov
Copy link

How You fixed it? I have the same error now..

@fnbrs
Copy link

fnbrs commented May 5, 2018

Seems i can't even log in.

send: b'password=string&username=string'
reply: 'HTTP/1.1 403 Forbidden\r\n'

What I'm doing wrong?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment