ricardojoserf/wistia_downloader.py

## wistia_downloader.py
# Script to download videos hosted in Wistia by right-clicking the video and pasting the “Copy link and thumbnail” info as the 1st parameter of this script
# The 2nd parameter is optional, the video name. Working at March of 2022, it downloads the video with higher quality
# Syntax:
# python3 wistia_downloader.py 'copied info with right click' 'file name'
# Example with the video from https://wistia.com/:
# python3 wistia_downloader.py '<p><a href="https://wistia.com?wvideo=vhkqhqhzyq"><img src="https://embedwistia-a.akamaihd.net/deliveries/48f1d62d1ceddb4284ad9cf67c916235.jpg?image_play_button_size=2x&amp;image_crop_resized=960x540&amp;image_play_button=1&amp;image_play_button_color=fa4fa0e0" width="400" height="225" style="width: 400px; height: 225px;"></a></p><p><a href="https://wistia.com?wvideo=vhkqhqhzyq">The video hosting platform made for B2B marketers | Wistia</a></p>' "test.mp4"

import requests
import json
import bs4
import sys
import re
import os

copied_info = sys.argv[1]
soup = bs4.BeautifulSoup(copied_info, "lxml")
video_id = soup.find("a")["href"].split("=")[1]

fast_url = "https://fast.wistia.net/embed/iframe/"+video_id+"?videoFoam=true"
print("Fast url: %s" % fast_url)

regex = "(?:https://[a-z0-9.\-/]+[.]bin)"
pattern = re.compile(regex)

content_ = requests.get(fast_url).content
soup_ = bs4.BeautifulSoup(content_, "lxml")
script_tags = soup_.findAll("script")

for t in script_tags:
	for u in t:
		if ".bin" in u:
			json_data = u.split("W.iframeInit(")[1].split(", {});")[0]
			data_ = json.loads(json_data)

assets_data = data_.get("assets")
assets_video = []
for asset in assets_data:
	if asset["type"] == "hd_mp4_video":
		assets_video.append(asset)
if(len(assets_video) == 0):
	print("No HD videos found (type hd_mp4_video). Searching regular videos (type md_mp4_video)")
	for asset in assets_data:
		if asset["type"] == "md_mp4_video":
			assets_video.append(asset)
if(len(assets_video) == 0):
	print("No HD or regular videos found, sorry bye")
	sys.exit(0)

bin_url = (assets_video[len(assets_video)-1]["url"])
print(".bin url: %s" % bin_url)

bin_name = bin_url.split("/")[4]
new_name = bin_name.replace(".bin",".mp4") if len(sys.argv) <= 2 else sys.argv[2]
new_name = new_name.replace("/","-")

print("Downloading %s"%(bin_name))
os.system("wget -q "+bin_url)

print("Renaming %s to %s"%(bin_name,new_name))
os.rename(bin_name,new_name)
	# Script to download videos hosted in Wistia by right-clicking the video and pasting the “Copy link and thumbnail” info as the 1st parameter of this script
	# The 2nd parameter is optional, the video name. Working at March of 2022, it downloads the video with higher quality
	# Syntax:
	# python3 wistia_downloader.py 'copied info with right click' 'file name'
	# Example with the video from https://wistia.com/:
	# python3 wistia_downloader.py '<p><a href="https://wistia.com?wvideo=vhkqhqhzyq"><img src="https://embedwistia-a.akamaihd.net/deliveries/48f1d62d1ceddb4284ad9cf67c916235.jpg?image_play_button_size=2x&image_crop_resized=960x540&image_play_button=1&image_play_button_color=fa4fa0e0" width="400" height="225" style="width: 400px; height: 225px;"></a></p><p><a href="https://wistia.com?wvideo=vhkqhqhzyq">The video hosting platform made for B2B marketers \| Wistia</a></p>' "test.mp4"

	import requests
	import json
	import bs4
	import sys
	import re
	import os

	copied_info = sys.argv[1]
	soup = bs4.BeautifulSoup(copied_info, "lxml")
	video_id = soup.find("a")["href"].split("=")[1]

	fast_url = "https://fast.wistia.net/embed/iframe/"+video_id+"?videoFoam=true"
	print("Fast url: %s" % fast_url)

	regex = "(?:https://[a-z0-9.\-/]+[.]bin)"
	pattern = re.compile(regex)

	content_ = requests.get(fast_url).content
	soup_ = bs4.BeautifulSoup(content_, "lxml")
	script_tags = soup_.findAll("script")

	for t in script_tags:
	for u in t:
	if ".bin" in u:
	json_data = u.split("W.iframeInit(")[1].split(", {});")[0]
	data_ = json.loads(json_data)

	assets_data = data_.get("assets")
	assets_video = []
	for asset in assets_data:
	if asset["type"] == "hd_mp4_video":
	assets_video.append(asset)
	if(len(assets_video) == 0):
	print("No HD videos found (type hd_mp4_video). Searching regular videos (type md_mp4_video)")
	for asset in assets_data:
	if asset["type"] == "md_mp4_video":
	assets_video.append(asset)
	if(len(assets_video) == 0):
	print("No HD or regular videos found, sorry bye")
	sys.exit(0)

	bin_url = (assets_video[len(assets_video)-1]["url"])
	print(".bin url: %s" % bin_url)

	bin_name = bin_url.split("/")[4]
	new_name = bin_name.replace(".bin",".mp4") if len(sys.argv) <= 2 else sys.argv[2]
	new_name = new_name.replace("/","-")

	print("Downloading %s"%(bin_name))
	os.system("wget -q "+bin_url)

	print("Renaming %s to %s"%(bin_name,new_name))
	os.rename(bin_name,new_name)