Last active
August 31, 2021 02:10
-
-
Save bkaankuguoglu/e14d90d84c877d8f734153749a60eabf to your computer and use it in GitHub Desktop.
File Collector - Collects image files from the server via SOAP requests.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#========================================================================# | |
# file_collector.py # | |
#========================================================================# | |
#========================================================================# | |
# usage: file_collector.py [-h] [-f FILE] [-o OUTPUT_FOLDER] # | |
#========================================================================# | |
# optional arguments: | |
# -h, --help show this help message and exit | |
# -f FILE, --file FILE the xlsx file that contains all the files to be | |
# collected | |
# -o OUTPUT_FOLDER, --output OUTPUT_FOLDER | |
# the folder in which the collected files are stored | |
# | |
#========================================================================# | |
# Sample usage: # | |
#========================================================================# | |
# 1. Move to the directory where the script is placed. | |
# cd "path/to/folder" | |
# 2. To collect the files listed in the excel file "example.xlsx" and | |
# save them into the folder "output" | |
# python file_collector.py -f "path/to/sample.xlsx" -o "output" | |
#========================================================================# | |
from openpyxl import * | |
from zeep import * | |
import os | |
import argparse | |
out_path = "images/" | |
res_path = "results/" | |
wsdl = "path/to/wsdl" | |
client = Client(wsdl) | |
base64_encoded_images = {} | |
username = "username" | |
password = "password" | |
doc_id = "doc_id" | |
def decode(base64_string, doc, file_id, img_format, out_folder): | |
doc_type = ''.join(e for e in doc if e.isalnum()) | |
try: | |
# Attempt to convert the string | |
image_64_decode = base64_string | |
out_dir = out_path + out_folder + "/" | |
if not os.path.exists(out_dir): | |
os.makedirs(out_dir) | |
# Create a writable image file | |
image_result = open(out_dir + doc_type + "_" + file_id + "." + img_format, 'wb') | |
# Write the decoded result | |
image_result.write(image_64_decode) | |
# Close the file after writing image | |
image_result.close() | |
return image_64_decode | |
except: | |
print('exception occured in decoding image from ' + str(base64_string)) | |
raise | |
def read_images(worksheet): | |
wb = load_workbook(worksheet) | |
ws = wb['Export Worksheet'] | |
images = {} | |
for c in ws.iter_rows(row_offset=1): | |
if c[0].value == None: | |
return images | |
file_id = c[0].value | |
doc_type = c[1].value | |
doc_date = c[2].value | |
doc_id = c[3].value | |
images[doc_id] = [file_id, doc_type, doc_date, doc_id] | |
# print([file_id, doc_type, doc_date, doc_id]) | |
return images | |
def read_doc_ids(worksheet): | |
wb = load_workbook(worksheet) | |
ws = wb['Export Worksheet'] | |
doc_ids = [] | |
for c in ws.iter_rows(row_offset=1): | |
if c[3].value == None: | |
return doc_ids | |
doc_ids.append(c[3].value) | |
return doc_ids | |
def send_request(doc_id, cl): | |
r = cl.service.GetDocumentWithID(document={'username': username, | |
'password': password, | |
'doc_id': doc_id}) | |
return r | |
def send_raw_request(doc_id, cl): | |
with cl.options(raw_response=True): | |
r = cl.service.GetDocumentWithID(document={'username': username, | |
'password': password, | |
'doc_id': doc_id}) | |
return r | |
def printProgressBar(iteration, total, prefix='', suffix='', decimals=1, length=100, fill='█'): | |
percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) | |
filled_length = int(length * iteration // total) | |
bar = fill * filled_length + '-' * (length - filled_length) | |
print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end='\r') | |
# Print New Line on Complete | |
if iteration == total: | |
print() | |
def show_license(): | |
print("\n# ======================================================================================= #\n" | |
"# File Collector - Collects image files from the server via SOAP requests #\n" + | |
"# Copyright © 2018 - Berk Kaan Kuguoglu #\n" + | |
"# ======================================================================================= #\n" + | |
"# This program comes with absolutely no warranty. #\n" + | |
"# This is free software, and you are welcome to redistribute it under certain conditions. #\n" + | |
"# ======================================================================================= #\n" + | |
"# usage: python file_collector.py [-h] [-f FILE] [-o OUTPUT_FOLDER] #\n" + | |
"# ======================================================================================= #\n" + | |
"# optional arguments: #\n" + | |
"# -h, --help show this help message and exit #\n" + | |
"# -f FILE, --file the xlsx file that contains all the files to be collected #\n" + | |
"# -o OUTPUT_FOLDER, --output the folder in which the collected files are stored #\n" + | |
"# ======================================================================================= #\n") | |
def main(args): | |
xlsx_file = args.input_sheet | |
out_folder = args.output_folder | |
# print(xlsx_file) | |
# print(out_folder) | |
# loop over the images to find the template in | |
images = read_images(xlsx_file) | |
doc_ids = read_doc_ids(xlsx_file) | |
image_ids = [] | |
num_of_files = len(doc_ids) | |
i = 1 | |
for doc_id in doc_ids: | |
percentage = 100 * float(i) / float(num_of_files) | |
# print(">>> Collecting files... " + | |
# "[" + str(i) + "/" + str(num_of_files) + "] --- " + | |
# format(percentage, '.2f') + "%") | |
prefix = ">>> Collecting files... " + \ | |
"[" + str(i) + "/" + str(num_of_files) + "]" | |
printProgressBar(i, num_of_files, prefix=prefix + ' Progress:', suffix='Complete', length=25) | |
r = send_request(doc_id, client) | |
img_format = r['mimeType'] | |
extension = img_format.split('/')[-1] | |
if extension in {"jpeg", "jpg", "png", "JPEG", "JPG"}: | |
encoded_img = r['data'] | |
base64_encoded_images[doc_id] = [encoded_img, extension] | |
image_ids.append(doc_id) | |
i += 1 | |
i = 1 | |
num_of_images = len(image_ids) | |
for doc_id in image_ids: | |
prefix = ">>> Writing files... [" + str(i) + "/" + str(num_of_images) + "]" | |
printProgressBar(i, num_of_images, prefix=prefix + ' Progress:', suffix='Complete', length=25) | |
img = base64_encoded_images.get(doc_id)[0] | |
img_format = base64_encoded_images.get(doc_id)[1] | |
doc_type = images.get(doc_id)[1] | |
file_id = images.get(doc_id)[3] | |
decode(img, doc_type, file_id, img_format, out_folder) | |
i += 1 | |
return | |
if __name__ == "__main__": | |
# execute only if run as a script | |
parser = argparse.ArgumentParser() | |
# Add more options if you like | |
parser.add_argument("-f", "--file", dest="input_sheet", | |
help="the xlsx file that contains all the files to be collected", metavar="FILE") | |
parser.add_argument("-o", "--output", dest="output_folder", | |
help="the folder in which the collected files are stored", metavar="OUTPUT_FOLDER") | |
args = parser.parse_args() | |
show_license() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment