Skip to content

Instantly share code, notes, and snippets.

@adrianjguerrero
Created March 11, 2021 04:48
Show Gist options
  • Save adrianjguerrero/aaf251c0a457eb8d787dbfac97bc9856 to your computer and use it in GitHub Desktop.
Save adrianjguerrero/aaf251c0a457eb8d787dbfac97bc9856 to your computer and use it in GitHub Desktop.
import re
import os
import urllib.parse
import urllib.request
from tqdm import tqdm
# pip install tqdm
class DownloadProgressBar(tqdm):
def update_to(self, b=1, bsize=1, tsize=None):
if tsize is not None:
self.total = tsize
self.update(b * bsize - self.n)
def download_url(url, output_path,str_progress):
with DownloadProgressBar(unit='B', unit_scale=True,miniters=1, desc=str_progress) as t:
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
f = open("dataset2.txt", "r",encoding="utf8")
file_content = f.read()
video_list = re.findall('https:\/\/s3\.us-.*.[mp4|ts]', file_content)
for video in video_list:
divide_string = re.search('(.*\/)(.*\.mp4$)', video)
if(divide_string):
video_name = urllib.parse.unquote(divide_string.group(2))
directory = re.search('(https:\/\/s3.*\.com\/)(.*)',urllib.parse.unquote(divide_string.group(1)))
directory = directory.group(2)
output_path = directory+video_name
str_to_show = str(video_list.index(video)+1)+"/"+str(len(video_list))+"-"+video_name
if not os.path.exists(directory):
os.makedirs(directory)
if not(os.path.exists(output_path)):
download_url(video, output_path,str_to_show)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment