Skip to content

Instantly share code, notes, and snippets.

@p4p1
Last active September 18, 2021 04:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save p4p1/890706f16a398543559661c8c4b9d520 to your computer and use it in GitHub Desktop.
Save p4p1/890706f16a398543559661c8c4b9d520 to your computer and use it in GitHub Desktop.
πŸ“€πŸ“€πŸ“€πŸ“€
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Made by papi
# har_content_extractor.py
# Requirements:
# json, requests(python3), urllib(python2)
# Description:
# Dump images and videos from .har files.
# The program can be edited to support python2 or 3 just uncomment the python 2
# lines and comment the python3 line to switch between the 2 :)
# autoclicker command: xdotool click --repeat 500 --delay 200 1
# Usage:
# ./har_content_extractor.py file_name.har file_name2.har ...
# Folder Structure:
# β”œβ”€β”€ images
# β”‚Β Β  β”œβ”€β”€ 0.jpg
# β”‚Β Β  └── 1.jpg
# └── videos
# β”œβ”€β”€ 0.mp4
# └── 1.mp4
#import urllib # For python2
import requests # For python3
import sys, json, os
inc_vid=0
inc_im=0
def dump_data(file):
global inc_vid
global inc_im
data=None
try:
os.stat("./videos")
os.stat("./images")
except:
os.mkdir("./videos")
os.mkdir("./images")
with open(file, "r") as fp:
data = json.load(fp)
for entrie in data["log"]["entries"]:
for headers in entrie["response"]["headers"]:
if headers["name"] == "content-type" and headers["value"] == "video/mp4":
print(entrie["request"]["url"] + " (" + str(inc_vid) + "vid)")
#urllib.urlretrieve(entrie["request"]["url"], "videos/%d.mp4" % inc_vid) # For python2
open("videos/%d.mp4" % inc_vid, 'wb').write(requests.get(entrie["request"]["url"], allow_redirects=True).content) # For python3
inc_vid+=1
if headers["name"] == "content-type" and headers["value"] == "image/jpeg":
print(entrie["request"]["url"] + " (" + str(inc_im) + "img)")
#urllib.urlretrieve(entrie["request"]["url"], "images/%d.jpg" % inc_im) # For python2
open("images/%d.jpg" % inc_im, 'wb').write(requests.get(entrie["request"]["url"], allow_redirects=True).content) # For python3
inc_im+=1
if __name__ == "__main__":
for arg in sys.argv:
if arg.find(".har") != -1:
dump_data(arg)
elif arg == sys.argv[0]:
continue
else:
print("Usage:")
print("\t%s file_name.har file_name2.har ...\n" % sys.argv[0])
print("Program made by p4p1 to download pictures and videos from multiple .har files.")
print("The program will create 2 directories named videos/ and images/ for the dumped content.")
print("Each file is named incrementally")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment