Skip to content

Instantly share code, notes, and snippets.

@hossainel
Created January 3, 2022 05:52
Show Gist options
  • Save hossainel/0d36a86246c83dc406897464cfc5b460 to your computer and use it in GitHub Desktop.
Save hossainel/0d36a86246c83dc406897464cfc5b460 to your computer and use it in GitHub Desktop.
from tqdm import tqdm
import requests
# file url
url = 'https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-stub-meta-history.xml.gz' #file size is '70.5GB'
# stream true is required
response = requests.get(url, stream=True)
# total file size
t = int(response.headers.get('content-length', 0))
block_size = 1024**2 #1 Mbit
progress_bar = tqdm(total=t, unit='iB', unit_scale=True)
with open('enwiki-latest-stub-meta-history.xml.gz', 'wb') as file:
for data in response.iter_content(block_size):
progress_bar.update(len(data))
file.write(data)
progress_bar.close()
if ( t != 0 ) and ( progress_bar.n != t ) : print("ERROR downloading file!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment