Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Followers Extraction Instagram
import json
import requests
import logging as log
log.basicConfig(level=log.DEBUG)
class FollowerExtractor():
"""
Extracts followers for a given profile
"""
def __init__(self, username, password):
self.csrf_token, self.cookie_string = FollowerExtractor.login_instagram(username, password)
log.info("CSRF Token set to %s", self.csrf_token)
log.info("Cookie String set to %s" % self.cookie_string)
@staticmethod
def get_csrf_and_cookie_string():
resp = requests.head("https://www.instagram.com")
return resp.cookies['csrftoken'], resp.headers['set-cookie']
@staticmethod
def login_instagram(username, password):
csrf_token, cookie_string = FollowerExtractor.get_csrf_and_cookie_string()
data = {"username": username, "password": password}
resp = requests.post("https://www.instagram.com/accounts/login/ajax/",
data=data,
headers={
"referer": "https://www.instagram.com/",
"accept": "*/*",
"Accept-Language": "en-GB,en;q=0.8",
"cache-control": "no-cache",
"content-length": "40",
"Content-Type": "application/x-www-form-urlencoded",
"cookie": cookie_string,
"origin": "https://www.instagram.com",
"pragma": "no-cache",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36",
"x-csrftoken": csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
})
return resp.cookies['csrftoken'], resp.headers['set-cookie']
def extract_followed_by(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_followed_by(username, user_id)
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
while resp['followed_by']['page_info']['has_next_page']:
resp = self.query_followed_by(username, user_id, resp['followed_by']['page_info']['end_cursor'])
followers = resp['followed_by']['nodes']
self.save_followed_by(followers)
followers += resp['followed_by']['nodes']
return followers
def extract_following(self, username, user_id=None):
if user_id is None:
user_id = json.loads(requests.get("https://www.instagram.com/%s?__a=1" % username).text)['user']['id']
resp = self.query_following(username, user_id)
followers = resp['follows']['nodes']
self.save_following(followers)
while resp['follows']['page_info']['has_next_page']:
resp = self.query_following(username, user_id, resp['follows']['page_info']['end_cursor'])
followers = resp['follows']['nodes']
self.save_following(followers)
followers += resp['follows']['nodes']
return followers
def query_following(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_following_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def query_followed_by(self, username, user_id, end_cursor=None):
headers = self.get_headers("https://www.instagram.com/%s" % username)
post_data = self.get_followed_by_params(user_id, end_cursor)
req = requests.post("https://www.instagram.com/query/", data=post_data, headers=headers)
return json.loads(req.text)
def get_headers(self, referrer):
"""
Returns a bunch of headers we need to use when querying Instagram
:param referrer: The page referrer URL
:return: A dict of headers
"""
return {
"referer": referrer,
"accept": "application/json, text/javascript, */*; q=0.01",
"Accept-Language": "en-GB,en;q=0.8,en-US;q=0.6",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"cookie": self.cookie_string,
"origin": "https://www.instagram.com",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/49.0.2623.87 Safari/537.36",
"x-csrftoken": self.csrf_token,
"x-instagram-ajax": "1",
"X-Requested-With": "XMLHttpRequest"
}
@staticmethod
def get_followed_by_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { followed_by.first(20) {" % user_id
else:
start_query = "ig_user(%s) { followed_by.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
@staticmethod
def get_following_params(user_id, end_cursor=None):
"""
Returns the query params required to load next page on Instagram.
This can be modified to return less information.
:param tag: Tag we're querying
:param end_cursor: The end cursor Instagram specifies
:return: A dict of request parameters
"""
if end_cursor is None:
start_query = "ig_user(%s) { follows.first(20) {" % user_id
else:
start_query = "ig_user(%s) { follows.after(%s, 20) {" % (user_id, end_cursor)
return {
'q':
start_query +
" count," +
" nodes {" +
" id," +
" is_verified," +
" followed_by_viewer," +
" requested_by_viewer," +
" full_name," +
" profile_pic_url," +
" username" +
" }," +
" page_info {" +
" end_cursor," +
" has_next_page" +
" }" +
"}" +
" }",
"ref": "relationships::follow_list"
}
def save_following(self, following):
"""
Called when a new batch of following users has been extracted from Instagram
:param following: Users who are following user
"""
for user in following:
print("Following: %s" % user['username'])
def save_followed_by(self, followed_by):
"""
Called when a new batch of followed_by users has been extracted from Instagram
:param following: Users who are followed_by
"""
for user in followed_by:
print("Followed By: %s" % user['username'])
if __name__ == '__main__':
instagram_username = "your_username"
instagram_password = "your_password"
followed_extractor = FollowerExtractor(instagram_username, instagram_password)
followed_extractor.extract_following("justintimberlake")
followed_extractor.extract_followed_by("justintimberlake")
@diegofcoelho

This comment has been minimized.

Copy link

commented Jan 5, 2017

Writing here just to make easier, but it was just like before..
Maybe some country related restriction? I tried login with different accounts (not all mine) and it did not work..

`

DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): www.instagram.com
DEBUG:requests.packages.urllib3.connectionpool:https://www.instagram.com:443 "HEAD / HTTP/1.1" 200 0
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): www.instagram.com
DEBUG:requests.packages.urllib3.connectionpool:https://www.instagram.com:443 "POST /accounts/login/ajax/ HTTP/1.1" 200 59
INFO:root:CSRF Token set to XXXXXXXXXX
INFO:root:Cookie String set to csrftoken= XXXXXXXXXXX; expires=Thu, 04-Jan-2018 20:32:04 GMT; Max-Age=31449600; Path=/; secure
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): www.instagram.com
DEBUG:requests.packages.urllib3.connectionpool:https://www.instagram.com:443 "GET /dfcoelho?__a=1 HTTP/1.1" 301 0
DEBUG:requests.packages.urllib3.connectionpool:https://www.instagram.com:443 "GET /dfcoelho/?__a=1 HTTP/1.1" 200 414
DEBUG:requests.packages.urllib3.connectionpool:Starting new HTTPS connection (1): www.instagram.com
DEBUG:requests.packages.urllib3.connectionpool:https://www.instagram.com:443 "POST /query/ HTTP/1.1" 200 43

{"status": "ok", "follows": {"count": 394}}

Traceback (most recent call last):
  File "C:/Users/dfcoelho/Documents/GitHub/ds/dev/beta.py", line 198, in <module>
    followed_extractor.extract_following("dfcoelho")
  File "C:/Users/dfcoelho/Documents/GitHub/ds/dev/beta.py", line 65, in extract_following
    followers = resp['follows']['nodes']
KeyError: 'nodes'

`

@tomkdickinson

This comment has been minimized.

Copy link
Owner Author

commented Jan 5, 2017

It's using Python 3, and requests 2.12.4.

The issue I see with that though, is you're trying to extract from (your?) private account. Are the users you're logging in with, following you?

@diegofcoelho

This comment has been minimized.

Copy link

commented Jan 5, 2017

yeap, I am following them and vice versa.
Another account I checking is even public..
So I logged with the public account and tried to get its own users doing that with no success..

Crazy, hah?

Here I am on Python 3.5, same request

One thing that bugs me is that in the response (html) this string appears:

"

This page could not be loaded. If you have cookies disabled in your browser, or you are browsing in Private Mode, please try enabling cookies or turning off Private Mode, and then retrying your action.

"

How come?

@tomkdickinson

This comment has been minimized.

Copy link
Owner Author

commented Jan 5, 2017

Hmm, that almost sounds like it's not sending the cookie string to the server.

Try adding this to the top of the script:

import http.client as http_client
http_client.HTTPConnection.debuglevel = 1

It should log the headers. See if a cookie header is being sent after it's logged in.

@diegofcoelho

This comment has been minimized.

Copy link

commented Jan 5, 2017

Until the login is fine:

header: Set-Cookie header: Content-Language header: Expires header: Pragma header: Vary header: Content-Type header: Content-Encoding header: Cache-Control header: Strict-Transport-Security header: Date header: X-Frame-Options header: Set-Cookie header: Set-Cookie header: Connection header: Content-Length send: b'POST /accounts/login/ajax/ HTTP/1.1\r\nHost: www.instagram.com\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36\r\nAccept-Encoding: gzip, deflate\r\naccept: */*\r\nConnection: keep-alive\r\nreferer: https://www.instagram.com/justintimberlake/\r\nAccept-Language: en-GB,en;q=0.8\r\ncache-control: no-cache\r\nContent-Length: 42\r\nContent-Type: application/x-www-form-urlencoded\r\ncookie: sessionid=; expires=Thu, 01-Jan-1970 00:00:00 GMT; Max-Age=0; Path=/; HttpOnly; Domain=instagram.com, mid= **YYYY** ; expires=Wed, 31-Dec-2036 22:06:30 GMT; Max-Age=630720000; Path=/, csrftoken= **XXXX** ; expires=Thu, 04-Jan-2018 22:06:30 GMT; Max-Age=31449600; Path=/; Secure\r\norigin: https://www.instagram.com\r\npragma: no-cache\r\nx-csrftoken: **XXXX** \r\nx-instagram-ajax: 1\r\nX-Requested-With: XMLHttpRequest\r\n\r\n'

send: b'username=U&password=P'

reply: 'HTTP/1.1 200 OK\r\n'

Then this appears:

## reply: 'HTTP/1.1 301 Moved Permanently\r\n'

But apparenty it sent the POST fine:

header: Strict-Transport-Security header: Content-Type header: Vary header: Content-Encoding header: Cache-Control header: Expires header: Content-Language header: Date header: Pragma header: Set-Cookie header: Set-Cookie header: Set-Cookie header: Connection header: Content-Length send: b'POST /query/ HTTP/1.1\r\nHost: www.instagram.com\r\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.87 Safari/537.36\r\nAccept-Encoding: gzip, deflate\r\naccept: application/json, text/javascript, */*; q=0.01\r\nConnection: keep-alive\r\nreferer: https://www.instagram.com/justintimberlake\r\nAccept-Language: en-GB,en;q=0.8,en-US;q=0.6\r\nContent-Type: application/x-www-form-urlencoded; charset=UTF-8\r\ncookie: ds_user_id=3667668898; expires=Wed, 05-Apr-2017 22:14:58 GMT; Max-Age=7776000; Path=/, csrftoken=**XXXX**; expires=Thu, 04-Jan-2018 22:14:58 GMT; Max-Age=31449600; Path=/; Secure, target=""; expires=Thu, 01-Jan-1970 00:00:00 GMT; Max-Age=0; Path=/, sessionid=**ZZZ**; expires=Wed, 05-Apr-2017 22:14:58 GMT; HttpOnly; Max-Age=7776000; Path=/; Secure\r\norigin: https://www.instagram.com\r\nx-csrftoken: **XXXX**\r\nx-instagram-ajax: 1\r\nX-Requested-With: XMLHttpRequest\r\nContent-Length: 307\r\n\r\n'

@tomkdickinson

This comment has been minimized.

Copy link
Owner Author

commented Jan 7, 2017

Not sure if it's the redirect that's causing it. I get that as well, and my response is fine.

I'll see if I can replicate your issue over the weekend. It could be a geo location thing, so I'll try it out on a VPN as well.

@diegofcoelho

This comment has been minimized.

Copy link

commented Jan 7, 2017

301 was due a couple missing slashes. I tried to submit a fix to you showing where but I am not sure if it was pushed properly (I actually have never collaborated in anything on github).

I tried using a VPN to UK and got same results. I also tried on linux. Same. Maybe a user geotag? I might try creating a usr through VPN and testing..

@andreasasprou

This comment has been minimized.

Copy link

commented Feb 23, 2017

Has anyone got this working? When I run it, it manages to get the token, but I have a 403 response from the authenticated requests. Any advice?

@zm030215

This comment has been minimized.

Copy link

commented Mar 28, 2017

I cannot work it out either, I also got 403 response. I assuem Instagram is forbidden some personal applications.

@samequefarias

This comment has been minimized.

Copy link

commented Jun 22, 2017

Good Morning,
I'm trying to find some algorithm in python that can collect geolocation data from instagram here in my city. Do you know any way? Thank you very much in advance.

@OwlGreenApple

This comment has been minimized.

Copy link

commented Jul 4, 2017

hi can this code still works ? Instagram give some changes recently. And it need some variable like query_id

@HaiGenkiDes

This comment has been minimized.

Copy link

commented Nov 9, 2017

@samequefarias - geolocation instagram
This is one workaround but i'm sure there is a better way,

  1. make a call to facebook graph API with lat lon of city -> get IDs of locations
    https://graph.facebook.com/search?q=&type=place&center=51.5074,-0.1278&distance=5000&access_token=ACCESS-TOKEN
  2. make a call to instagramI with facebook location IDs -> get Instagram location ID's
    https://api.instagram.com/v1/locations/search?facebook_places_id=273471170716&access_token=ACCESS-TOKEN
  3. make a call to instagram api with location ID -> get recent media
    https://api.instagram.com/v1/locations/30824484/media/recent?access_token=ACCESS-TOKEN
@yadkit

This comment has been minimized.

Copy link

commented Apr 14, 2018

__a=1 URI is fixed.

@RomanKlimov

This comment has been minimized.

Copy link

commented Apr 15, 2018

How You fixed it? I have the same error now..

@fnbrs

This comment has been minimized.

Copy link

commented May 5, 2018

Seems i can't even log in.

send: b'password=string&username=string'
reply: 'HTTP/1.1 403 Forbidden\r\n'

What I'm doing wrong?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.