Skip to content

Instantly share code, notes, and snippets.

@vishwarajanand
Last active March 15, 2024 11:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vishwarajanand/0287403e482cb9f305411f66eefbe63a to your computer and use it in GitHub Desktop.
Save vishwarajanand/0287403e482cb9f305411f66eefbe63a to your computer and use it in GitHub Desktop.
Folder uploads for google cloud php storage
<?php
require __DIR__ . "/vendor/autoload.php";
use Google\Cloud\Storage\StorageClient;
use GuzzleHttp\Promise\Each;
// Create a Storage client
$storage = new StorageClient([
'projectId' => 'XXXXXXXXXX',
]);
// Define the bucket name and folder path
$bucketName = 'XXXXXXXXXX';
$bucket = $storage->bucket($bucketName);
// create files using following command:
// mkdir -p folder_upload_test
// seq 10 | xargs -I {} bash -c "perl -le 'print \"xyz\" x 1048576' > folder_upload_test/{}.txt"
$folderPath = __DIR__ . '/folder_upload_test';
// Get a list of files in the folder
$files = scandir($folderPath);
// Remove '.' and '..' from the file list
$files = array_diff($files, ['.', '..']);
// Define the maximum number of parallel uploads
$maxParallelUploads = 8;
// Define the chunk size (in bytes)
// $chunkSize = 1024*256; //1024 * 1024 * 10; // 10 MB
// Create an array to store promises
$promises = [];
echo "###########################\n";
echo "####### START #############\n";
echo "###########################\n";
// Upload each file
foreach ($files as $file) {
if (is_dir($file)) {
echo "$file,directory,skip\n";
continue;
}
// Create a promise for uploading the file
$promises[$file] = function () use ($bucket, $folderPath, $file) {
// Define the object name (path within the bucket)
$objectName = 'folder_upload_test/'.basename($file);
$tstart = time();
echo "$tstart,$file,start\n";
// Create a MultipartUploader instance
$uploader = $bucket->getResumableUploader(
fopen($folderPath . '/' . $file, 'r'),
['name' => $objectName]
);
$resumeUri = $uploader->getResumeUri();
$attempts = 5;
$object = null;
while ($attempts > 0) {
try {
$object = $uploader->resume($resumeUri);
if ($object) {
break;
}
} catch (GoogleException $ex) {
$resumeUri = $bucket->getResumeUri();
$object = $bucket->resume($resumeUri);
$attempts--;
}
}
$tend = time();
if ($object) {
// Return success message
echo "$tend,$file,uploaded\n";
return ($tend - $tstart) . ",Uploaded,$file\n";
}
echo "$tend,$file,failed\n";
return ($tend - $tstart) . "failed,$file\n";
};
}
// Wait for all promises to complete
$results = Each::ofLimit(
$promises,
$maxParallelUploads,
function ($result, $idx) {
return $result();
},
function ($error) {
echo "Error: " . $error->getMessage() . "\n";
return $res;
}
)->wait();
echo "###########################\n";
echo "######### END #############\n";
echo "###########################\n";
// // Process the results
// foreach ($results as $file => $result) {
// if ($result['state'] === 'fulfilled') {
// // Handle successful upload
// echo $result['value'].PHP_EOL; // Print the success message
// } else {
// // Handle upload failure
// echo "Upload failed for $file: " . $result['reason'] . "\n";
// }
// }
// echo "Uploaded wait completed: " . JSON_ENCODE($results, JSON_PRETTY_PRINT) . PHP_EOL;

Setup

➜  Storage git:(main) ✗ ls -l --block-size=M folder_upload_test
total 873M
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 10.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 1.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 2.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 3.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 4.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 5.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 6.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 7.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 8.txt
-rw-r----- 1 vishwarajanand primarygroup 88M Mar  7 20:41 9.txt
➜  Storage git:(main) ✗ 

Resumable upload of files

➜  Storage git:(main) ✗ php folder_upload_resumable.php
###########################
####### START #############
###########################
1709843485,1.txt,start
1709843486,10.txt,start
1709843486,2.txt,start
1709843487,3.txt,start
1709843488,4.txt,start
1709843488,5.txt,start
1709843489,6.txt,start
1709843489,7.txt,start
1709843496,7.txt,uploaded
1709843496,8.txt,start
1709843502,8.txt,uploaded
1709843502,9.txt,start
1709843511,9.txt,uploaded
1709843517,6.txt,uploaded
1709843523,5.txt,uploaded
1709843529,4.txt,uploaded
1709843536,3.txt,uploaded
1709843542,2.txt,uploaded
1709843548,10.txt,uploaded
1709843554,1.txt,uploaded
###########################
######### END #############
###########################
➜  Storage git:(main) ✗ 

Multipart upload of files (non blocking IO)

➜  Storage git:(main) ✗ php test_folder_upload_multipart.php
###########################
####### START #############
###########################
1709845694,1.txt,start
1709845694,10.txt,start
1709845694,2.txt,start
1709845694,3.txt,start
1709845694,4.txt,start
1709845694,5.txt,start
1709845694,6.txt,start
1709845694,7.txt,start
1709845701,7.txt,uploaded,7
1709845701,8.txt,start
1709845706,8.txt,uploaded,5
1709845706,9.txt,start
1709845712,9.txt,uploaded,6
1709845712,6.txt,uploaded,18
1709845712,5.txt,uploaded,18
1709845712,4.txt,uploaded,18
1709845712,3.txt,uploaded,18
1709845712,2.txt,uploaded,18
1709845712,10.txt,uploaded,18
1709845712,1.txt,uploaded,18
###########################
######### END #############
###########################
➜  Storage git:(main) ✗ 
<?php
require __DIR__ . "/vendor/autoload.php";
use Google\Cloud\Storage\StorageClient;
use GuzzleHttp\Client;
use GuzzleHttp\Pool;
// Create a Storage client
$storage = new StorageClient();
// Define the bucket name and folder path
$bucketName = 'XXXXXXX';
$bucket = $storage->bucket($bucketName);
// create files using following command:
// mkdir -p folder_upload_test
// seq 10 | xargs -I {} bash -c "perl -le 'print \"xyz\" x 1048576' > folder_upload_test/{}.txt"
$folderPath = __DIR__ . '/folder_upload_test';
// Get a list of files in the folder
$files = scandir($folderPath);
// Remove '.' and '..' from the file list
$files = array_diff($files, ['.', '..']);
// Define the maximum number of parallel uploads
$maxParallelUploads = 8;
// Define the chunk size (in bytes)
// $chunkSize = 1024*256; //1024 * 1024 * 10; // 10 MB
// Create an array to store promises
$promises = [];
echo "###########################\n";
echo "####### START #############\n";
echo "###########################\n";
$ts = time();
echo "$ts,start,ALL\n";
// Upload each file
$requestGenerator = function ($files) use ($bucket, $folderPath) {
foreach ($files as $file) {
if (is_dir($file)) {
echo "$file,directory,skip\n";
continue;
}
yield $file => function () use ($bucket, $folderPath, $file) {
$objectName = $folderPath.'/'.$file;
$tstart = time();
echo "$tstart,start,$file\n";
return $bucket->uploadAsync(
fopen($folderPath . '/' . $file, 'r'),
['name' => $objectName]
);
};
}
};
// 100 files of 30 MB each took 4.5 minutes for 8 concurrency in EachPromise
// 100 files of 30 MB each took ~40 seconds for 8 concurrency in Guzzle\Pool
// Wait for all promises to complete
$client = new Client(['base_uri' => 'http://httpbin.org']);
$results = (new Pool(
$client,
$requestGenerator($files),
[
'concurrency' => $maxParallelUploads, // Set the maximum number of concurrent requests
'fulfilled' => function (
$response,
$index
) {
// Handle successful response
// echo "Request $index completed: " . $response->name() . PHP_EOL;
$tend = time();
echo "$tend,uploaded,$index\n";
},
'rejected' => function (
$reason,
$index
) {
// Handle failed request
echo "Request $index failed: $reason" . PHP_EOL;
}
]
))->promise()->wait();
$te = time();
$tdiff = $te-$ts;
echo "$te,end,ALL,$tdiff\n";
echo "###########################\n";
echo "######### END #############\n";
echo "###########################\n";
<?php
require __DIR__ . "/vendor/autoload.php";
use Google\Cloud\Storage\StorageClient;
use GuzzleHttp\Promise\Each;
// Create a Storage client
$storage = new StorageClient([
'projectId' => 'XXXXXXXXXX',
]);
// Define the bucket name and folder path
$bucketName = 'XXXXXXXXXX';
$bucket = $storage->bucket($bucketName);
// create files using following command:
// mkdir -p folder_upload_test
// seq 10 | xargs -I {} bash -c "perl -le 'print \"xyz\" x 1048576' > folder_upload_test/{}.txt"
$folderPath = __DIR__ . '/folder_upload_test';
// Get a list of files in the folder
$files = scandir($folderPath);
// Remove '.' and '..' from the file list
$files = array_diff($files, ['.', '..']);
// Define the maximum number of parallel uploads
$maxParallelUploads = 8;
// Define the chunk size (in bytes)
// $chunkSize = 1024*256; //1024 * 1024 * 10; // 10 MB
// Create an array to store promises
$promises = [];
echo "###########################\n";
echo "####### START #############\n";
echo "###########################\n";
// Upload each file
foreach ($files as $file) {
if (is_dir($file)) {
echo "$file,directory,skip\n";
continue;
}
// Create a promise for uploading the file
$promises[$file] = function () use ($bucket, $folderPath, $file) {
// Define the object name (path within the bucket)
$objectName = 'folder_upload_test/'.basename($file);
$tstart = time();
echo "$tstart,$file,start\n";
// Create a MultipartUploader instance
$objectPromise = $bucket->uploadAsync(
fopen($folderPath . '/' . $file, 'r'),
['name' => $objectName]
);
$object = $objectPromise->wait();
$tend = time();
if ($object) {
// Return success message
echo "$tend,$file,uploaded,".($tend - $tstart)."\n";
} else {
echo "$tend,$file,failed,".($tend - $tstart)."\n";
}
return $object;
};
}
// Wait for all promises to complete
$results = Each::ofLimit(
$promises,
$maxParallelUploads,
function ($result, $idx) {
return $result();
},
function ($error) {
echo "Error: " . $error->getMessage() . "\n";
return $res;
}
)->wait();
echo "###########################\n";
echo "######### END #############\n";
echo "###########################\n";
// // Process the results
// foreach ($results as $file => $result) {
// if ($result['state'] === 'fulfilled') {
// // Handle successful upload
// echo $result['value'].PHP_EOL; // Print the success message
// } else {
// // Handle upload failure
// echo "Upload failed for $file: " . $result['reason'] . "\n";
// }
// }
// echo "Uploaded wait completed: " . JSON_ENCODE($results, JSON_PRETTY_PRINT) . PHP_EOL;
<?php
require __DIR__ . "/vendor/autoload.php";
use Google\Cloud\Storage\StorageClient;
use GuzzleHttp\Promise;
// Create a Storage client
$storage = new StorageClient();
// Define the bucket name and folder path
$bucketName = 'XXXXXXXXXXXXX';
$bucket = $storage->bucket($bucketName);
// create files using following command:
// mkdir -p folder_upload_test
// seq 10 | xargs -I {} bash -c "perl -le 'print \"xyz\" x 1048576' > folder_upload_test/{}.txt"
$folderPath = __DIR__ . '/folder_upload_test';
// Get a list of files in the folder
$files = scandir($folderPath);
// Remove '.' and '..' from the file list
$files = array_diff($files, ['.', '..']);
// Define the maximum number of parallel uploads
$maxParallelUploads = 8;
// Create an array to store promises
$promises = [];
echo "###########################\n";
echo "####### START #############\n";
echo "###########################\n";
$ts = time();
echo "$ts,start,ALL\n";
// Upload each file
$promises = [];
foreach ($files as $file) {
if (is_dir($file)) {
echo "$file,directory,skip\n";
continue;
}
$objectName = $folderPath.'/'.$file;
$tstart = time();
echo "$tstart,start,$file\n";
$promises[] = $bucket->uploadAsync(
fopen($folderPath . '/' . $file, 'r'),
['name' => $objectName]
);
}
// Wait for the requests to complete; throws a ConnectException
// if any of the requests fail
$responses = Promise\Utils::unwrap($promises);
// Wait for the requests to complete, even if some of them fail
$responses = Promise\Utils::settle($promises)->wait();
// 100 files of 30 MB each took 4.5 minutes for 8 concurrency in EachPromise
// 100 files of 30 MB each took ~40 seconds for 8 concurrency in Guzzle\Pool
// 100 files of 30 MB each took ~10 seconds for Utils::unwrap or settle
$te = time();
$tdiff = $te-$ts;
echo "$te,end,ALL,$tdiff\n";
echo "###########################\n";
echo "######### END #############\n";
echo "###########################\n";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment