Skip to content

Instantly share code, notes, and snippets.

@foresmac
Last active December 28, 2021 08:09
Show Gist options
  • Save foresmac/8059013 to your computer and use it in GitHub Desktop.
Save foresmac/8059013 to your computer and use it in GitHub Desktop.
Download a particular hashtag from InstagramThis script will download all the images with a particular hashtag from Instagram. It's not very clever, and will try to suck down every image it can find; if you use a particularly popular tag, it will take a **long time** to run. Simply run it from the command line like so:`./get_instagram_tag.py <ta…
#! /usr/bin/python
## get_instagram_tag
## by Chris Foresman
## @foresmac
##
## Instagram API: http://instagram.com/developer/
## Requires the requests Python library
## (sudo) pip install requests
from __future__ import print_function
import requests
import json
import sys
import os
ACCESS_TOKEN = os.getenv('INSTAGRAM_ACCESS_TOKEN', '')
def get_image_urls(start_url):
api_response = json.loads(requests.get(start_url).content)
img_urls = [
pic['images']['standard_resolution']['url']
for pic in api_response['data'] if pic['type'] == 'image']
vid_urls = [
vid['videos']['standard_resolution']['url']
for vid in api_response['data'] if vid['type'] == 'video']
try:
next_url = api_response['pagination']['next_url']
except:
next_url = ''
while next_url:
api_response = json.loads(requests.get(next_url).content)
img_urls += [
pic['images']['standard_resolution']['url']
for pic in api_response['data'] if pic['type'] == 'image']
vid_urls += [
vid['videos']['standard_resolution']['url']
for vid in api_response['data'] if vid['type'] == 'video']
try:
next_url = api_response['pagination']['next_url']
except:
next_url = ''
print('\n\nGot {0} URLs for images from Instagram API.'.format(
len(img_urls)))
print('Got {0} URLs for videos from Instagram API.\n'.format(
len(vid_urls)))
img_urls.reverse()
vid_urls.reverse()
return img_urls, vid_urls
def download_file_from_url(url, filename):
with open(filename, 'wb') as f:
f.write(requests.get(url).content)
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage: get_instagram_tag.py <tag> <path_to_folder>')
sys.exit(2)
if not ACCESS_TOKEN:
print(
'You need to store your Instagram API access token in an'
'environment variable named INSTAGRAM_ACCESS_TOKEN.')
sys.exit(1)
hashtag = sys.argv[1]
base_filepath = (
os.path.expanduser(sys.argv[2]) if sys.argv[2] else os.getcwd())
start_url_string = (
'https://api.instagram.com/v1/tags/{0}/media/recent?access_token={1}')
start_url = start_url_string.format(hashtag, ACCESS_TOKEN)
img_urls, vid_urls = get_image_urls(start_url)
img_number, vid_number = 0, 0
if not os.path.exists(base_filepath):
os.makedirs(base_filepath)
if img_urls:
for url in img_urls:
img_number += 1
filename = '{0}-{1:03d}.jpg'.format(hashtag, img_number)
full_filename = os.path.join(base_filepath, filename)
try:
download_file_from_url(url, full_filename)
print('.', end='')
sys.stdout.flush()
except:
print('X', end='')
sys.stdout.flush()
img_number -= 1
print('\n\nDownloaded {0} images tagged #{1}\n'.format(img_number, hashtag))
if vid_urls:
base_filepath += '/videos'
if not os.path.exists(base_filepath):
os.makedirs(base_filepath)
for url in vid_urls:
vid_number += 1
filename = '{0}-{1:03d}.mp4'.format(hashtag, vid_number)
full_filename = os.path.join(base_filepath, filename)
try:
download_file_from_url(url, full_filename)
print('.', end='')
sys.stdout.flush()
except:
print('X', end='')
sys.stdout.flush()
vid_number -= 1
print('\n\nDownloaded {0} videos tagged #{1}'.format(vid_number, hashtag))
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment