Created
March 4, 2017 21:21
-
-
Save S0rryMyBad/621ac875a24dbd986be00e5fcb41395f to your computer and use it in GitHub Desktop.
Simple URL grabber for instagram written in python3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
by - Epsi R Nurwijadi | |
modified by - Randall Tux | |
""" | |
import requests | |
import json | |
import time | |
from lxml import html | |
from pprint import pprint | |
import requests | |
user = input('Input username : ') | |
url = "https://www.instagram.com/" + user + "/" | |
print(url) | |
print("") | |
r_main = requests.get(url) | |
page_main = html.fromstring(r_main.text) | |
elements_main = page_main.xpath('//script[contains(., "_sharedData")]/text()') | |
element_data = elements_main[0] | |
element_data = element_data.replace("window._sharedData = ", "") | |
element_data = "[" + element_data + "]" | |
element_data = element_data.replace("};]", "}]") | |
json_data = json.loads(element_data) | |
json_data = json_data[0]["entry_data"]["ProfilePage"][0]["user"]["media"] | |
json_nodes = json_data["nodes"] | |
# count = len(json_nodes) | |
# pprint(json_nodes) | |
images = [] | |
videos = [] | |
sidecars = [] | |
urls_image = [] | |
urls_video = [] | |
urls_sidecar = [] | |
for json_node in json_nodes: | |
typename = json_node["__typename"] | |
code = json_node["code"] | |
if typename == "GraphImage": | |
images.append(code) | |
elif typename == "GraphVideo": | |
videos.append(code) | |
elif typename == "GraphSidecar": | |
sidecars.append(code) | |
print(time.strftime('%X')) | |
for image in images: | |
url_image = "https://www.instagram.com/p/"+image+"/" | |
print(url_image) | |
r_post = requests.get(url_image) | |
page_post = html.fromstring(r_post.text) | |
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()') | |
element_data = elements_post[0] | |
element_data = element_data.replace("window._sharedData = ", "") | |
element_data = "[" + element_data + "]" | |
element_data = element_data.replace("};]", "}]") | |
json_data = json.loads(element_data) | |
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"]["display_src"] | |
urls_image.append(json_data) | |
print(time.strftime('%X')) | |
for video in videos: | |
url_video = "https://www.instagram.com/p/"+video+"/" | |
print(url_video) | |
r_post = requests.get(url_video) | |
page_post = html.fromstring(r_post.text) | |
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()') | |
element_data = elements_post[0] | |
element_data = element_data.replace("window._sharedData = ", "") | |
element_data = "[" + element_data + "]" | |
element_data = element_data.replace("};]", "}]") | |
json_data = json.loads(element_data) | |
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"]["video_url"] | |
urls_video.append(json_data) | |
print(time.strftime('%X')) | |
for sidecar in sidecars: | |
url_sidecar = "https://www.instagram.com/p/"+sidecar+"/" | |
print(url_sidecar) | |
r_post = requests.get(url_sidecar) | |
page_post = html.fromstring(r_post.text) | |
elements_post = page_post.xpath('//script[contains(., "_sharedData")]/text()') | |
element_data = elements_post[0] | |
element_data = element_data.replace("window._sharedData = ", "") | |
element_data = "[" + element_data + "]" | |
element_data = element_data.replace("};]", "}]") | |
json_data = json.loads(element_data) | |
json_data = json_data[0]["entry_data"]["PostPage"][0]["media"] | |
edges = json_data["edge_sidecar_to_children"]["edges"] | |
for edge in edges: | |
json_data = edge["node"]["display_url"] | |
urls_sidecar.append(json_data) | |
print("") | |
print("Images") | |
for url_image in urls_image: | |
print(url_image) | |
print("") | |
print("Videos") | |
for url_video in urls_video: | |
print(url_video) | |
print("") | |
print("SideCar") | |
for url_sidecar in urls_sidecar: | |
print(url_sidecar) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment