Skip to content

Instantly share code, notes, and snippets.

@ketankr9
Last active May 10, 2018 06:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ketankr9/dcdcc8dd23406ef8962c5a53cab7e300 to your computer and use it in GitHub Desktop.
Save ketankr9/dcdcc8dd23406ef8962c5a53cab7e300 to your computer and use it in GitHub Desktop.
A python script to scrape both "Most Recent" and "Top Posts" associated with any tag from Instagram.
import requests, json
class HashTag:
"""
This class helps in extracting image links associated with instagram hashtag, both recent and top posts.
"""
def __init__(self, tag, tagType="top_posts"):
"""
tag := the tag you are concerned with scraping
"""
self.tag = tag
self.data = None
self.links = []
self.url = ""
self.tagType = tagType
self.status = 404
self.scrape()
def __get_request(self, retry=3):
url = "https://www.instagram.com/explore/tags/" + self.tag + "/?__a=1"
while self.status != 200 and retry > 0:
retry-=1
try:
r = requests.get(url)
self.status = 200
except:
pass
return r.text
def __clean_data(self, rawData):
self.data = json.loads(rawData)
def __extract_tag_links(self, tagType):
posts = self.data["graphql"]["hashtag"]["edge_hashtag_to_"+self.tagType]["edges"]
for i in range(len(posts)):
self.links.append(posts[i]["node"]["display_url"])
def scrape(self, tagType="top_posts"):
"""
tagType can take two values:
top_posts := extract the links of top posts associated with the provided tag
media := extract the links of most recent posts asscoiated with provided tag
"""
rawData = self.__get_request()
self.__clean_data(rawData)
self.__extract_tag_links(tagType)
T = HashTag("35mm", "top_posts")
print T.links
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment