<!--
see https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes
see src/Disks/DiskFactory.h
see src/Disks/StoragePolicy.cpp
-->
<clickhouse>
<storage_configuration>
<disks>
<!--
local disk.
a local disk with name 'default' is special. it's path must be specified in <clickhouse><path>
-->
<disk_local>
<type>local</type>
<path>/mnt/data/xxx</path>
<keep_free_space_bytes>1024</keep_free_space_bytes>
<!-- Or -->
<keep_free_space_ratio>0.2</keep_free_space_ratio>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
</disk_local>
<!-- encrypted disk -->
<disk_encrypted>
<type>encrypted</type>
<!-- encrypted disk is a *wrapper* over another disk -->
<disk>disk_local</disk>
<!-- path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory (i.e. <disk_local>.path). must end with '/' -->
<path>data_encrypted/</disk>
<!-- algorithms:
AES_128_CTR, /// Size of key is 16 bytes.
AES_192_CTR, /// Size of key is 24 bytes.
AES_256_CTR, /// Size of key is 32 bytes.
-->
<algorithm>AES_128_CTR</algorithm>
<key id="0">_16_ascii_chars_</key>
<key_hex id="1">00112233445566778899AABBCCDDEEFF</key_hex>
<!-- The key used for encryption. All the specified keys can be used for decryption, and you can always
switch to another key while maintaining access to previously encrypted data. -->
<current_key_id>0</current_key_id>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
</disk_encrypted>
<!-- s3 disk -->
<disk_s3>
<type>s3</type>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
<region>east</region>
<!--
see https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access
or https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access
endpoint URI must end with '/'. Can contain macros.
-->
<endpoint>http://enpoint.com/uri/</endpoint>
<!--
path to store meta data. defaults to <clickhouse.path>/disks/<NAME>/
not needed for type s3_plain
-->
<metadata_path>/var/lib/clickhouse/disks/disk_s3/</metadata_path>
<access_key_id>access_key_id</access_key_id>
<secret_access_key>secret_access_key</secret_access_key>
<server_side_encryption_customer_key_base64>server_side_encryption_customer_key_base64</server_side_encryption_customer_key_base64>
<use_environment_credentials>use_environment_credentials</use_environment_credentials>
<use_insecure_imds_request>use_insecure_imds_request</use_insecure_imds_request>
<support_batch_delete>true</support_batch_delete>
<support_proxy>true</support_proxy>
<proxy>
<!-- may specify multiple HTTP or HTTPS proxy uri's -->
<uri>http://proxy1.xyz/</uri>
<uri>http://proxy2.xyz/</uri>
<!--
At each interaction with S3 resolver sends empty GET request to specified endpoint URL to obtain proxy host.
Proxy host is returned as string in response body.
Then S3 client uses proxy URL formed as proxy_scheme://proxy_host:proxy_port to make request.
-->
<resolver>
<endpoint></endpoint>
<!-- HTTP or HTTPS -->
<proxy_scheme>https</proxy_scheme>
<proxy_port>8030</proxy_port>
<!-- cache TTL in seconds -->
<proxy_cache_time>10</proxy_cache_time>
</resolver>
</proxy>
<connect_timeout_ms>10000</connect_timeout_ms>
<request_timeout_ms>30000</request_timeout_ms>
<max_connections>100</max_connections>
<!-- 1024*1024 -->
<min_bytes_for_seek>1048576</min_bytes_for_seek>
<list_object_keys_size>1000</list_object_keys_size>
<objects_chunk_size_to_delete>1000</objects_chunk_size_to_delete>
<retry_attempts>10</retry_attempts>
<send_metadata>false</send_metadata>
<thread_pool_size>16</thread_pool_size>
<!--
This configuration is only applicable to s3. Other types of object storage are not applicable or have different meanings.
only supports STANDARD and INTELLIGENT_TIERING.
-->
<s3_storage_class>STANDARD</s3_storage_class>
</disk_s3>
<!-- s3_plain disk -->
<disk_s3_plain>
<!-- all settings are same as disk s3, except that <metadata_path> is not needed -->
</disk_s3_plain>
<!-- cache disk. mostly for caching data in object storage disks -->
<disk_cache>
<type>cache</type>
<disk>disk_s3</disk>
<!-- path to store cache data. required -->
<path>/var/lib/clickhouse/disks/disk_s3/cache/</path>
<!-- mandatory -->
<max_size>1073741824</max_size>
<max_elements>1048576</max_elements>
<max_file_segment_size>104857600</max_file_segment_size>
<cache_on_write_operations>false</cache_on_write_operations>
<enable_filesystem_query_cache_limit>false</enable_filesystem_query_cache_limit>
<enable_cache_hits_threshold>false</enable_cache_hits_threshold>
<enable_bypass_cache_with_threashold>false</enable_bypass_cache_with_threashold>
<bypass_cache_threashold>268435456</bypass_cache_threashold>
<do_not_evict_index_and_mark_files>false</do_not_evict_index_and_mark_files>
</disk_cache>
<!-- web disk -->
<disk_web>
<type>web</type>
<!-- endpoint URI must end with '/'. Can contain macros. -->
<endpoint>http://enpoint.com/uri/</endpoint>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
</disk_web>
<disk_azure_blob>
<storage_account_url>http://storage/account/url/</storage_account_url>
<container_name>default-container</container_name>
<container_already_exists>false</container_already_exists>
<connection_string>connection_string</connection_string>
<!-- Or -->
<account_key>account_key</account_key>
<account_name>account_name</account_name>
<!-- path to store meta data. defaults to <clickhouse.path>/disks/<NAME>/ -->
<metadata_path>/var/lib/clickhouse/disks/disk_azure_blob/</metadata_path>
<!-- storage settings -->
<max_single_part_upload_size>104857600</max_single_part_upload_size>
<min_bytes_for_seek>1048576</min_bytes_for_seek>
<max_single_read_retries>3</max_single_read_retries>
<max_single_download_retries>3</max_single_download_retries>
<list_object_keys_size>100</list_object_keys_size>
<thread_pool_size>16</thread_pool_size>
<send_metadata>false</send_metadata>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
</disk_azure_blob>
<disk_hdfs>
<!-- endpoint URI must end with '/'. Can contain macros. -->
<endpoint>hdfs://enpoint.com/uri/</endpoint>
<!-- path to store meta data. defaults to <clickhouse.path>/disks/<NAME>/ -->
<metadata_path>/var/lib/clickhouse/disks/disk_hdfs/</metadata_path>
<!-- 1024*1024 -->
<min_bytes_for_seek>1048576</min_bytes_for_seek>
<objects_chunk_size_to_delete>1000</objects_chunk_size_to_delete>
<thread_pool_size>16</thread_pool_size>
<!-- skip access check (write some content to a random file and read it back), for readonly disks -->
<skip_access_check>false</skip_access_check>
</disk_hdfs>
</disks>
<policies>
<all>
<!--
The order of volume enumeration within a storage policy is *important*. Once a volume is overfilled,
data are moved to the next one. The order of disk enumeration is *important* as well because data are
stored on them in turns.
-->
<volumes>
<main>
<!-- disk name refering storage_configuration.disks.<NAME>, e.g. "disk_local" -->
<disk0>disk_local</disk>
<disk1>s3</disk>
<!-- only JBOD -->
<raid_type>JBOD</raid_type>
<max_data_part_size_bytes>0</max_data_part_size_bytes>
<!-- Or -->
<max_data_part_size_ratio>0.0</max_data_part_size_ratio>
<perform_ttl_move_on_insert>1</perform_ttl_move_on_insert>
<prefer_not_to_merge>0</prefer_not_to_merge>
<!-- round_robin ofastestmirrorr least_used -->
<load_balancing>round_robin</load_balancing>
</main>
</volumes>
<!--
when the amount of available space gets lower than this factor, data automatically starts to move on
the next volume if any. defaults to 0.1
-->
<move_factor>0.1</move_factor>
</all>
</policies>
</storage_configuration>
<local_disk_check_period_ms>10000</local_disk_check_period_ms>
<merge_tree>
<storage_policy>all</storage_policy>
<allow_remote_fs_zero_copy_replication>false</allow_remote_fs_zero_copy_replication>
</merge_tree>
<!-- network config that also applies to storages over network -->
<remote_url_allow_hosts>
<host_regexp></host_regexp>
<!-- Or -->
<host></host>
<remote_url_allow_hosts>
<connect_timeout_ms></connect_timeout_ms>
<request_timeout_ms></request_timeout_ms>
<max_connections></max_connections>
<s3>
<!-- s3 settings (without 's3_' prefix) -->
</s3>
<hdfs>
<!-- hdfs settings -->
<hadoop_kerberos_keytab>/tmp/keytab/clickhouse.keytab</hadoop_kerberos_keytab>
<hadoop_kerberos_principal>root@TEST.CLICKHOUSE.TECH</hadoop_kerberos_principal>
<hadoop_security_authentication>kerberos</hadoop_security_authentication>
<!-- etc.-->
</hdfs>
</clickhouse>
Setting | Type | Default | Description | Note |
---|---|---|---|---|
hdfs_replication | UInt64 | 0 | The actual number of replications can be specified when the hdfs file is created. | |
hdfs_truncate_on_insert | Bool | false | Enables or disables truncate before insert in s3 engine tables | |
hdfs_create_new_file_on_insert | Bool | false | Enables or disables creating a new file on each insert in hdfs engine tables |
Search settings with prefix s3_
replace ^\s+M\(([^,]+), ([^,]+), ([^,]+), "([^"]+)", ([^,]+)\)
with | $2 | $1 | $3 | $4 |
Setting | Type | Default | Description | Note |
---|---|---|---|---|
s3_min_upload_part_size | UInt64 | 1610241024 | The minimum size of part to upload during multipart upload to S3. | |
s3_max_upload_part_size | UInt64 | 5ull10241024*1024 | The maximum size of part to upload during multipart upload to S3. | |
s3_upload_part_size_multiply_factor | UInt64 | 2 | Multiply s3_min_upload_part_size by this factor each time s3_multiply_parts_count_threshold parts were uploaded from a single write to S3. | |
s3_upload_part_size_multiply_parts_count_threshold | UInt64 | 500 | Each time this number of parts was uploaded to S3 s3_min_upload_part_size multiplied by s3_upload_part_size_multiply_factor. | |
s3_max_single_part_upload_size | UInt64 | 3210241024 | The maximum size of object to upload using singlepart upload to S3. | |
s3_max_single_read_retries | UInt64 | 4 | The maximum number of retries during single S3 read. | request setting |
s3_max_unexpected_write_error_retries | UInt64 | 4 | The maximum number of retries in case of unexpected errors during S3 write. | |
s3_max_redirects | UInt64 | 10 | Max number of S3 redirects hops allowed. | |
s3_max_connections | UInt64 | 1024 | The maximum number of connections per server. | request setting |
s3_max_get_rps | UInt64 | 0 | Limit on S3 GET request per second rate before throttling. Zero means unlimited. | request setting |
s3_max_get_burst | UInt64 | 0 | Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to s3_max_get_rps | request setting |
s3_max_put_rps | UInt64 | 0 | Limit on S3 PUT request per second rate before throttling. Zero means unlimited. | request setting |
s3_max_put_burst | UInt64 | 0 | Max number of requests that can be issued simultaneously before hitting request per second limit. By default (0) equals to s3_max_put_rps | request setting |
s3_list_object_keys_size | UInt64 | 1000 | Maximum number of files that could be returned in batch by ListObject request | request setting |
s3_truncate_on_insert | Bool | false | Enables or disables truncate before insert in s3 engine tables. | |
s3_create_new_file_on_insert | Bool | false | Enables or disables creating a new file on each insert in s3 engine tables | |
s3_check_objects_after_upload | Bool | false | Check each uploaded object to s3 with head request to be sure that upload was successful | request setting |
s3_allow_parallel_part_upload | Bool | true | Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage | |
s3_throw_on_zero_files_match | Bool | false | Throw an error, when ListObjects request cannot match any files | request setting |
enable_s3_requests_logging | Bool | false | Enable very explicit logging of S3 requests. Makes sense for debug only. |
request settings can be specified in config clickhouse/storage_configuration/disks
, clickhouse/s3
, or as context settings.
when specified in clickhouse/s3
, strip the s3_
prefix.