Skip to content

Instantly share code, notes, and snippets.

@nelsonsequiera
Created October 30, 2020 18:25
Show Gist options
  • Save nelsonsequiera/bd95ed820e76eb2360dbd258708024f9 to your computer and use it in GitHub Desktop.
Save nelsonsequiera/bd95ed820e76eb2360dbd258708024f9 to your computer and use it in GitHub Desktop.
import requests
import json
import time
import csv
import pandas as pd
# step 1: get all followers html content manually from insta page. in one line keep only one username (without quotes and comma.)
def get_usernames():
with open('followers_usernames.txt') as f:
return [line.strip() for line in f]
# step 2: get followers data
with open('followers_raw_data.txt', 'a') as f:
for i, username in enumerate(get_usernames()):
print(f'{i} - getting data for {username}')
url = f"https://www.instagram.com/{username}/?__a=1"
try:
response = requests.request("GET", url, headers={}, data={})
except Exception as e:
print(e)
time.sleep(60 * 60)
try:
data = response.json()
json.dump(data, f)
f.write('\n')
except Exception as e:
print(e)
time.sleep(1)
# step 3: remove unwanted data
re = list()
with open('followers_raw_data.txt', 'r') as f:
for each in f:
data = json.loads(each)
ss = each['graphql']['user']
[ss.pop(x) for x in ['edge_felix_video_timeline', 'edge_owner_to_timeline_media',
'edge_saved_media', 'edge_media_collections', 'edge_related_profiles']]
ss.update({"edge_followed_by": each['graphql']['user']['edge_followed_by']['count'], 'edge_follow': each['graphql']['user']
['edge_follow']['count'], 'edge_mutual_followed_by': each['graphql']['user']['edge_mutual_followed_by']['count']})
re.append(ss)
# step 4: save the data to csv file
with open('followers_data.csv', 'w') as f:
csv_writer = csv.writer(f)
d = False
for emp in re:
if not d:
header = emp.keys()
csv_writer.writerow(header)
d = True
csv_writer.writerow(emp.values())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment