Created
March 11, 2021 04:48
-
-
Save adrianjguerrero/aaf251c0a457eb8d787dbfac97bc9856 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import os | |
import urllib.parse | |
import urllib.request | |
from tqdm import tqdm | |
# pip install tqdm | |
class DownloadProgressBar(tqdm): | |
def update_to(self, b=1, bsize=1, tsize=None): | |
if tsize is not None: | |
self.total = tsize | |
self.update(b * bsize - self.n) | |
def download_url(url, output_path,str_progress): | |
with DownloadProgressBar(unit='B', unit_scale=True,miniters=1, desc=str_progress) as t: | |
urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to) | |
f = open("dataset2.txt", "r",encoding="utf8") | |
file_content = f.read() | |
video_list = re.findall('https:\/\/s3\.us-.*.[mp4|ts]', file_content) | |
for video in video_list: | |
divide_string = re.search('(.*\/)(.*\.mp4$)', video) | |
if(divide_string): | |
video_name = urllib.parse.unquote(divide_string.group(2)) | |
directory = re.search('(https:\/\/s3.*\.com\/)(.*)',urllib.parse.unquote(divide_string.group(1))) | |
directory = directory.group(2) | |
output_path = directory+video_name | |
str_to_show = str(video_list.index(video)+1)+"/"+str(len(video_list))+"-"+video_name | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
if not(os.path.exists(output_path)): | |
download_url(video, output_path,str_to_show) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment