Skip to content

Instantly share code, notes, and snippets.

@S0rryMyBad
Created March 4, 2017 21:21
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save S0rryMyBad/621ac875a24dbd986be00e5fcb41395f to your computer and use it in GitHub Desktop.
Save S0rryMyBad/621ac875a24dbd986be00e5fcb41395f to your computer and use it in GitHub Desktop.
Simple URL grabber for instagram written in python3
#!/usr/bin/env python3
"""
by - Epsi R Nurwijadi
modified by - Randall Tux
"""
import requests
import json
import time
from lxml import html
from pprint import pprint
import requests
user = input('Input username : ')
url = "https://www.instagram.com/" + user + "/"
print(url)
print("")
r_main = requests.get(url)
page_main = html.fromstring(r_main.text)
elements_main = page_main.xpath('//script[contains(., "_sharedData")]/text()')
element_data = elements_main[0]
element_data = element_data.replace("window._sharedData = ", "")
element_data = "[" + element_data + "]"
element_data = element_data.replace("};]", "}]")
json_data = json.loads(element_data)
json_data = json_data[0]["entry_data"]["ProfilePage"][0]["user"]["media"]
json_nodes = json_data["nodes"]
# count = len(json_nodes)
# pprint(json_nodes)
images = []
videos = []
sidecars = []
urls_image = []
urls_video = []
urls_sidecar = []
for json_node in json_nodes:
typename = json_node["__typename"]
code = json_node["code"]
if typename == "GraphImage":
images.append(code)
elif typename == "GraphVideo":
videos.append(code)
elif typename == "GraphSidecar":
sidecars.append(code)
print(time.strftime('%X'))
for image in images:
url_image = "https://www.instagram.com/p/"+image+"/"
print(url_image)
r_post = requests.get(url_image)
page_post = html.fromstring(r_post.text)
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()')
element_data = elements_post[0]
element_data = element_data.replace("window._sharedData = ", "")
element_data = "[" + element_data + "]"
element_data = element_data.replace("};]", "}]")
json_data = json.loads(element_data)
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"]["display_src"]
urls_image.append(json_data)
print(time.strftime('%X'))
for video in videos:
url_video = "https://www.instagram.com/p/"+video+"/"
print(url_video)
r_post = requests.get(url_video)
page_post = html.fromstring(r_post.text)
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()')
element_data = elements_post[0]
element_data = element_data.replace("window._sharedData = ", "")
element_data = "[" + element_data + "]"
element_data = element_data.replace("};]", "}]")
json_data = json.loads(element_data)
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"]["video_url"]
urls_video.append(json_data)
print(time.strftime('%X'))
for sidecar in sidecars:
url_sidecar = "https://www.instagram.com/p/"+sidecar+"/"
print(url_sidecar)
r_post = requests.get(url_sidecar)
page_post = html.fromstring(r_post.text)
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()')
element_data = elements_post[0]
element_data = element_data.replace("window._sharedData = ", "")
element_data = "[" + element_data + "]"
element_data = element_data.replace("};]", "}]")
json_data = json.loads(element_data)
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"]
edges = json_data["edge_sidecar_to_children"]["edges"]
for edge in edges:
json_data = edge["node"]["display_url"]
urls_sidecar.append(json_data)
print("")
print("Images")
for url_image in urls_image:
print(url_image)
print("")
print("Videos")
for url_video in urls_video:
print(url_video)
print("")
print("SideCar")
for url_sidecar in urls_sidecar:
print(url_sidecar)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment