Created
January 14, 2020 15:59
-
-
Save boogheta/fe8f6e397ca4268e12d2e31ee741ba4c to your computer and use it in GitHub Desktop.
Collect users and followers on Instagram with fake InstagramAPI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, json | |
from InstagramAPI import InstagramAPI | |
from time import sleep | |
from pprint import pprint | |
#import pandas as pd | |
#import requests, urllib, bs4, ssl, json, sys, re | |
def connect_api(user, password): | |
api = InstagramAPI(user, password) | |
retries = 12 | |
while not api.login() and retries > 0: | |
sleep(5) | |
retries -= 1 | |
if not api and not retries: | |
print("Impossible to connect to Instagram with account %s/%s" % (user, password), file=sys.stderr) | |
sys.exit() | |
return api | |
def collect_users(api, list_users): | |
try: | |
with open("users_info.json") as f: | |
fulldata = json.load(f) | |
except: | |
fulldata = {} | |
accounts = [] | |
for user_id in list_users: | |
print("WORKING on", user_id) | |
if user_id in fulldata: | |
content = fulldata[user_id] | |
else: | |
retries = 12 | |
keys = {} | |
while 'user' not in keys and retries > 0: | |
api.getUsernameInfo(user_id) | |
content = api.LastJson | |
keys = content.keys() | |
if 'user' not in keys: | |
sleep(5) | |
retries -= 1 | |
if 'user' not in keys and not retries: | |
print("Impossible to collect user %s" % (user_id), file=sys.stderr) | |
pprint(content) | |
break | |
fulldata[user_id] = content | |
with open("users_info.json", "w") as f: | |
json.dump(fulldata, f) | |
dict_account = { | |
'user_id': user_id, | |
'username' : content['user']['username'], | |
'nb_posts': content['user']['media_count'], | |
'nb_followers': content['user']['follower_count'], | |
'nb_following': content['user']['following_count'], | |
'biography': content['user']['biography'], | |
'business_account': content['user']['is_business'], | |
'external_url': content['user']['external_url'], | |
'email': content['user'].get('email') | |
} | |
accounts.append(dict_account) | |
sleep(5) | |
return accounts | |
def collect_followers(api, list_users): | |
try: | |
with open("followers_info.json") as f: | |
fulldata = json.load(f) | |
except: | |
fulldata = {} | |
# fulldata = { | |
# 583: { | |
# "next_max_id": False, | |
# "followers": [] | |
# } | |
# } | |
user_followers = {} | |
for user_id in list_users: | |
print("WORKING on", user_id) | |
if user_id in fulldata: | |
followers = fulldata[user_id]["followers"] | |
next_max_id = fulldata[user_id]["next_max_id"] | |
if not next_max_id: | |
continue | |
else: | |
followers = [] | |
next_max_id = True | |
fulldata[user_id] = { | |
"next_max_id": True, | |
"followers": [] | |
} | |
while next_max_id: | |
if next_max_id is True: | |
next_max_id = '' | |
retries = 12 | |
fail = True | |
while fail and retries: | |
api.getUserFollowers(user_id, maxid=next_max_id) | |
content = api.LastJson | |
fail = "status" in content and content["status"] == "fail" | |
if fail: | |
sleep(5) | |
retries -= 1 | |
if fail: | |
print("Impossible to collect followers for user %s after %s" % (user_id, next_max_id), file=sys.stderr) | |
pprint(content) | |
sys.exit() | |
followers.extend(content.get("users", [])) | |
next_max_id = content.get("next_max_id", False) | |
fulldata[user_id]["followers"] = followers | |
fulldata[user_id]["next_max_id"] = next_max_id | |
with open("users_info.json", "w") as f: | |
json.dump(fulldata, f) | |
sleep(5) | |
user_followers[user_id] = followers | |
return user_followers | |
def store_accounts_as_csv(accounts, filename): | |
pass | |
#pd.DataFrame(account).to_csv('info_account1.csv', index=False, sep = ";") | |
#return pd.DataFrame(account) | |
list_ids = [337971045,10173172837,5708920156,12537108438,18341208002,18590139197,19297143602,515814807,1395905971,7072598564,857128325,49299102,2945198438,4420087292,21438298166,19984486527,1906240752,7070095870,2579722126,295635844,22904886662,3644338541,3602461392,23292267332,3508969231,19609124215,3446732741,2977775554,1620762013,7332266442,8142428011,21956610305,21531584620,7019467857,9035517470,5534057213,2304132968,8782062667,2694726042,10096501877,258193824,18753220956,1523148837,263866558,2081033702,38502642,13220407486,4562817158,4155110032,15762558953,12041648666,337202992,19555638099,5915281563,1277552728,2025431878,22604627105,12605844,1499137365,7030432419,21606357780,9284662114,3118527987,6632870837,3593864527,5454766143,16169897513,3471096946,733557832,595693700,185782160,9958639815,6619855531,6203997062,3622616553,4590712874,16661568596,6950490907,19939252778,20292931079,8618584510,180609477,2464340484,190621442,3631493356,11165539607,285795392,8419252313,1949248362,8926687023,4575581534,3892134892,16129318075,1825132273,177293615,14646254128,5464770900,8118628539,381081801,7459551266,7529634962,298848606,17641402987,4015260548,1187715018,6635654389,1550869072,19294421450,3941464878,10145861788] | |
#list2 = [11825812741,3170312929,4057419877,200133817,3435329739,8187844022,18884944773,2023208616,21997825236,1675249774,14982482,16617613746,6320112056,1787954632,7797163837,1420206756,18886889042,21509476686,8935229467,2008612110,10949652797,1953815149,7599797254,1679941698,5595149649,4233705265,2257577385,14297220433,2289239013,1782277430,11472918196,7383070628,4345576080,11877883391,11324818653,7925783270,21810322655,1426197079,5346012429,12784632805,341383180,7284285302,1986471972,9236083364,5621927646,1136819870,4160132196,8443960171,7878952048,1465781351,6237203402,12275018823,2125958516,3670563541,298756519,1408181630,3769751466,6229597236,1637920381,21237654260,15475791164,19444379223,2011666202,1928368037,3881198522,4004019915,11804397522,590441685,470739052,15410611763,2148687548,2159312097,36957963,13233898933,5495650,19581047063,6725488025,1744975479,6005416110,208861550,4060822521,1835779221,6871689508,7156904617,4324684081,1421059539,25715676,3261356864,7463773711,356433855,7958875238,2893288415,8934390122,199730470,10699390142,5449373771,12597970216,8274726467,1536323358,1429766734,18770400397,2085315863,4008779555,21446031699,10992854930,18645601714,12256128791,3505430247,9130591321,3268276,21262772807,3938612394,7175872002,2997977399,3537804985,16919267,3595965947,1829188208,11578348657,22423436365,54429165,4779068551,22718812402,3582172948,183286468,7305708339,6459632134,18871270208,17473435497,422794975,10895097220,5590709634,3315515018,1652500545,1997477498,2165952902,4165752894,14052972865,4148109313,13682136332,5894257320,7324486058,7404535024,214660632,19257734797,1537912168,249011472,6921660787,1244671760,1201345996,173644312,2090425980,3268618703,7318241862,21786344886,10866067911,585918169,1061865823,14410394212,19568767716,8503143014,7629393071,21674702302,2298114350,18800844274,10004813655,18018069342,13065613071,227832857,2070906273,3556242227,46715662,18009410251,5351318501,2876979029,18651327130,4776521177,21056955404,20258665363,399190308,5478787190,1994625751,11381482217,1436897537,7533513091,22181469195,20482275640,4213318973,7138251166,10902388306,19391434217,17225835759,20680281814,190568565,8519633566,4739412020,1824602793,1662357475,187842531,6214962940,13345126003,250585860,5580046326,7038737840,1482956843,1900772090,20008267221,2343688727,19211902931,484501085,12990159032,17645570698,8679648861,8177875018,370580726,11930791238,9138285120,20217218708,4145740402,6116180985,21918275413,11141005169,17288380807,14583201468,6020537746,11875170336,22705015388,4360363982,1732676087,18119295638,5809775680,23921138937,6640747203,19544852488,9376559078,9301216217,3271894668,5386987039,16469109866,1667922900,2009732757,4050478037,3057947313,7483342664,17087945209,2365713725,8944487716,1649863114,8657591059,3919764334,3888326697,8081141162,8006372831,4091842663,6021346931,8216508673,9023085638,1545982006,13765331082,21679112557,2702460094,2221070295,7057003987,8661052357,19092214883,4516208636,3485546446,326527422,3172958006,21734572830,13816058779,18530528749,7151631020,264948968,2922546436,9207738136,6086815792,238047472,3448242177,18745178554,1543667327,1070417878,11459457105,8234749648,7230644682,6105478177,7651257434,1008276905,14037201094,6717190954,8581412984,4276757483,21004850582,541808129,15920849975,4555914280,1790743870,12370732144,6866081026,2944773028,3141702123,11774810260,20344154957,2241126438,10167077288,1540479555,1700279093,1283109311,3013626593,1112915592,2104411965,16965142395,2011249859,37633326,248182511,3015785380,21976848639,8004094586,11651845564,20738590780,1563580677,581389911,18785495597,8649939925,4338377164,3134119300,7335868351,574188184,3926342822,15437636037,2950470899,24267802041,2288974491,10380175853,10718818038,4357321010,4480378892,6894234851,13823260935,4628890860,11141842932,183251821,18900242840,19706256478,32239496,497595819,9614218593,18841534112,1098188697,4060626572,15750438462,9781767448,2233178158,3179610755,3052163922,5055280170,20736104329,5402222552,208749462,1471183535,4899517131,15413328726,7521303771,1069468357,7633889076,1952490812,15895941584,2526345498,1989794302,4632688741,15234766743,6943817814,5563763370,5049388221,9063149225,3094772600,1782357729,4240549752,195198040,6157373906,5415353496,4428150952,20258241893,14487528393,1450823313,1829418687,12133754817,3311579535,8103886141,21211360293,282181916,3067203,18648081131,14296831,5589783307,2012677718,192210656,12518943408,8596544396,4333773169,1521446434,19374131817,10032387741,4377380247,4283699836,3112525277,1427518040,2132004777,12138487,3002985,4712389943,2327419438,774664520,2110142521,4203217562,206742,18032811240,3648168046] | |
username = sys.argv[1] | |
password = sys.argv[2] | |
api = connect_api(username, password) | |
#accounts = collect_users(api, list_ids) | |
#pprint(accounts) | |
#store_accounts_as_csv(accounts, "info_accounts.csv") | |
followers = collect_followers(api, [1523148838]) | |
pprint(followers) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment