Last active
April 13, 2023 19:33
-
-
Save PDXIII/7466392 to your computer and use it in GitHub Desktop.
This script downloads files from a given directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding: iso-8859-1 -*- | |
import sys | |
import os | |
import requests | |
from sets import Set | |
''' | |
This script downloads all images found under a given path | |
and with the naming schema containing a word, a trailing number and a file extension | |
like 'someName-000.jpg' or 'anotherName00000.png' | |
''' | |
def main(): | |
# first the script asks for the url of an image in the collection | |
firstImageURL = raw_input('Please enter URL of the first image: ') | |
# call for doing something with the URL | |
fileInfo = getFileInfo(firstImageURL) | |
# script asks for an folder for downloading the files to within the download directory | |
# this is OSX dependent, please change this for your needs, especially the [USERNAME] | |
downloadDirectory = os.path.join('/Users/[USERNAME]/Downloads', raw_input('Name a new folder for the download: ')) | |
# creates the demanded directory | |
if not os.path.exists(downloadDirectory): | |
os.makedirs(downloadDirectory) | |
# call for download | |
getDownload(fileInfo, downloadDirectory) | |
print('READY') | |
return 0 | |
def getFileInfo(firstImageURL): | |
# splitting up the given URL to get all the information we need | |
urlDirectories = firstImageURL.split('/') | |
# the last part is the name of the file | |
givenFileName = urlDirectories[-1] | |
fileNamePieces = givenFileName.split('.') | |
directoryURL = firstImageURL.replace(givenFileName, '') | |
originalName = getOriginalName(fileNamePieces[0]) | |
# an dictionary keeps all the info and let us get the values by keys | |
fileInfo = {'name': originalName[0], 'digits': originalName[1], 'extension': fileNamePieces[-1], 'path': directoryURL} | |
return fileInfo | |
def getOriginalName(someName): | |
# creating an number set for checking if the last characters are numbers | |
numberSet = Set(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']) | |
digits = 1 | |
# checking from behind | |
while digits <= len(someName) and someName[-digits] in numberSet: | |
digits += 1 | |
# we must reduce the digits | |
digits -= 1 | |
# making a list for returning just one object | |
if digits == len(someName): | |
name = ['', digits] | |
elif digits < len(someName): | |
name = [someName[:-digits], digits] | |
return name | |
# this function downloads the files using the request library | |
def getDownload(fileInfo, downloadDirectory): | |
fileNumber = 0 | |
# sometimes files aren’t numbered correctly, that’s why we have a error counter | |
errorCount = 0 | |
errorMax = 10 | |
while errorCount < errorMax: | |
# generating the file name. really like the zfill() function | |
fileName = fileInfo['name'] + str(fileNumber).zfill(fileInfo['digits']) + '.' + fileInfo['extension'] | |
fileNumber += 1 | |
try: | |
r = requests.get(fileInfo['path'] + fileName) | |
# only download when you’ll get the right status | |
if r.status_code == 200: | |
# create a path where you want to save the file | |
downloadPath = os.path.join(downloadDirectory, fileName) | |
# create a new file write only | |
downloadFile = open(downloadPath, 'w') | |
# wrinting the content | |
downloadFile.write(r.content) | |
# always close a file | |
downloadFile.close() | |
print('Downloaded file: ' + fileName) | |
# resetting the error counter | |
errorCount = 0 | |
else: | |
errorCount += 1 | |
print 'Error No:', errorCount | |
print('ERROR while downloading file: ' + fileName) | |
except Exception, e: | |
errorCount += 1 | |
print 'Error No:', errorCount | |
print 'Error: ', e | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment