Created
October 11, 2023 05:44
-
-
Save yuhr123/7acb7e6bb42fb0ff12f3ba64d2cdd7da to your computer and use it in GitHub Desktop.
Script for testing the write performance of MinIO, S3FS-FUSE, and JuiceFS.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
import argparse | |
# Create a file | |
def create_local_file(file_number, local_directory, file_size_mb): | |
local_file_name = f'sample_file_{file_number}.txt' | |
local_file_path = os.path.join(local_directory, local_file_name) | |
with open(local_file_path, 'wb') as file: | |
file.write(os.urandom(file_size_mb * 1024 * 1024)) | |
# Calculate the write-in time | |
def write_to_local_files(num_files, local_directory, file_size_mb): | |
start_time = time.time() | |
for i in range(1, num_files + 1): | |
create_local_file(i, local_directory, file_size_mb) | |
end_time = time.time() | |
elapsed_time = end_time - start_time | |
return elapsed_time | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Create local files with specified options.") | |
parser.add_argument("-n", "--num_files", type=int, default=10000, help="Number of files to create") | |
parser.add_argument("-d", "--local_directory", type=str, default="/root/mnt-juicefs/", help="Local directory to create files in") | |
parser.add_argument("-s", "--file_size_mb", type=int, default=1, help="Size of each file in megabytes") | |
args = parser.parse_args() | |
num_files = args.num_files | |
local_directory = args.local_directory | |
file_size_mb = args.file_size_mb | |
juicefs_write_time = write_to_local_files(num_files, local_directory, file_size_mb) | |
print(f"Created {num_files} {file_size_mb}MB files in {local_directory}") | |
print(f"Total time taken: {juicefs_write_time:.2f} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import pandas as pd | |
# Write a dummy CSV file to test-bucket | |
df = pd.DataFrame({"column1": ["new_value1"], "column2": ["new_value2"]}) | |
df.to_csv("/root/mnt-juicefs/test-data.csv", index=False) | |
def process_s3fs(): | |
for i in range(100): | |
# Read the existing data | |
print(i) | |
df = pd.read_csv('/root/mnt-juicefs/test-data.csv') | |
# Append a new row | |
new_df = pd.concat([df, pd.DataFrame([{"column1": f"value{i}", "column2": f"value{i}"}])], ignore_index=True) | |
# Write the data back to the file | |
new_df.to_csv('/root/mnt-juicefs/test-data.csv', index=False) | |
execution_time = timeit.timeit(process_s3fs, number=1) | |
print(f"Execution time: {execution_time:.2f} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import fsspec | |
import s3fs | |
import pandas as pd | |
fsspec.config.conf = { | |
"s3": | |
{ | |
"key": "admin", | |
"secret": "abc123abc", | |
"client_kwargs": { | |
"endpoint_url": "http://127.0.0.1:9000" | |
} | |
} | |
} | |
s3 = s3fs.S3FileSystem() | |
df = pd.DataFrame({"column1": ["new_value1"], "column2": ["new_value2"]}) | |
df.to_csv("s3://myjfs/test-data.csv", index=False) | |
def process_s3(): | |
for i in range(100): | |
# Read the existing data | |
print(i) | |
df = pd.read_csv('s3://myjfs/test-data.csv') | |
# Append a new row | |
new_df = pd.concat([df, pd.DataFrame([{"column1": f"value{i}", "column2": f"value{i}"}])], ignore_index=True) | |
# Write the data back to the file | |
new_df.to_csv('s3://myjfs/test-data.csv', index=False) | |
execution_time = timeit.timeit(process_s3, number=1) | |
print(f"Execution time: {execution_time:.2f} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import fsspec | |
import s3fs | |
import pandas as pd | |
fsspec.config.conf = { | |
"s3": | |
{ | |
"key": "admin", | |
"secret": "abc123abc", | |
"client_kwargs": { | |
"endpoint_url": "http://172.16.254.18:9000" | |
} | |
} | |
} | |
s3 = s3fs.S3FileSystem() | |
df = pd.DataFrame({"column1": ["new_value1"], "column2": ["new_value2"]}) | |
df.to_csv("s3://test-minio/test-data.csv", index=False) | |
def process_s3(): | |
for i in range(100): | |
# Read the existing data | |
print(i) | |
df = pd.read_csv('s3://test-minio/test-data.csv') | |
# Append a new row | |
new_df = pd.concat([df, pd.DataFrame([{"column1": f"value{i}", "column2": f"value{i}"}])], ignore_index=True) | |
# Write the data back to the file | |
new_df.to_csv('s3://test-minio/test-data.csv', index=False) | |
execution_time = timeit.timeit(process_s3, number=1) | |
print(f"Execution time: {execution_time:.2f} seconds") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import timeit | |
import pandas as pd | |
# Write a dummy CSV file to test-bucket | |
df = pd.DataFrame({"column1": ["new_value1"], "column2": ["new_value2"]}) | |
df.to_csv("/root/mnt-s3fs/test-data.csv", index=False) | |
def process_s3fs(): | |
for i in range(100): | |
# Read the existing data | |
print(i) | |
df = pd.read_csv('/root/mnt-s3fs/test-data.csv') | |
# Append a new row | |
new_df = pd.concat([df, pd.DataFrame([{"column1": f"value{i}", "column2": f"value{i}"}])], ignore_index=True) | |
# Write the data back to the file | |
new_df.to_csv('/root/mnt-s3fs/test-data.csv', index=False) | |
execution_time = timeit.timeit(process_s3fs, number=1) | |
print(f"Execution time: {execution_time:.2f} seconds") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment