Last active
December 13, 2023 22:01
-
-
Save suanmiao/f9e77ca96a218974408f41f013fd5924 to your computer and use it in GitHub Desktop.
oci_download_multi_threads.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import oci | |
import os | |
import sys | |
import concurrent.futures | |
def download_file(object_storage_client, namespace, bucket_name, object_name, local_directory): | |
file_name = object_name.split('/')[-1] | |
if file_name: # Skip directories | |
local_file_path = os.path.join(local_directory, file_name) | |
get_obj = object_storage_client.get_object(namespace, bucket_name, object_name) | |
with open(local_file_path, 'wb') as f: | |
for chunk in get_obj.data.raw.stream(1024 * 1024, decode_content=False): | |
f.write(chunk) | |
def download_directory(object_storage_client, namespace, bucket_name, oci_prefix, local_directory): | |
if not os.path.exists(local_directory): | |
os.makedirs(local_directory) | |
list_objects = object_storage_client.list_objects(namespace, bucket_name, prefix=oci_prefix) | |
objects = [obj.name for obj in list_objects.data.objects] | |
# Use ThreadPoolExecutor for parallel downloads | |
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor: | |
futures = [executor.submit(download_file, object_storage_client, namespace, bucket_name, obj, local_directory) for obj in objects] | |
for future in concurrent.futures.as_completed(futures): | |
future.result() # Wait for each download to complete | |
if __name__ == "__main__": | |
if len(sys.argv) != 3: | |
print("Usage: python oci_download.py oci://<bucket_name>/<path> <local_directory>") | |
sys.exit(1) | |
oci_path = sys.argv[1] | |
local_directory = sys.argv[2] | |
bucket_name = oci_path.split('/')[2] | |
oci_prefix = '/'.join(oci_path.split('/')[3:]) | |
config = oci.config.from_file() | |
object_storage_client = oci.object_storage.ObjectStorageClient(config) | |
namespace = object_storage_client.get_namespace().data | |
download_directory(object_storage_client, namespace, bucket_name, oci_prefix, local_directory) | |
print("Download completed.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment