Skip to content

Instantly share code, notes, and snippets.

@Slach
Last active January 22, 2018 03:31
Show Gist options
  • Save Slach/267c8981f97536397d8c7885ab20df7c to your computer and use it in GitHub Desktop.
Save Slach/267c8981f97536397d8c7885ab20df7c to your computer and use it in GitHub Desktop.
error "Table structure in ZooKeeper is too much different from local table structure." when try create ReplicatedMergeTree
*.log
.vagrant/
.idea/
*.exe
*.dll
*.so
*.dylib
*.pcap
id_rsa
# Test binary, build with `go test -c`
*.test
# Output of the go coverage tool, specifically when used with LiteIDE
*.out
# Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736
.glide/
version: '2'
services:
clickhouse.base:
image: yandex/clickhouse-server
volumes:
- ./config.xml:/etc/clickhouse-server/config.xml
- ./users.xml:/etc/clickhouse-server/users.xml
environment:
CLICKHOUSE_LAYER: 1
CLICKHOUSE_SHARD: 1
CLICKHOUSE_REPLICA: clickhouse.base
CLICKHOUSE_CONFIG: /etc/clickhouse-server/config.xml
entrypoint: sh -c 'mkdir -p /etc/clickhouse-server/conf.d/ && echo "<?xml version=\"1.0\"?><yandex><macros><layer>$$CLICKHOUSE_LAYER</layer><shard>$$CLICKHOUSE_SHARD</shard><replica>$$CLICKHOUSE_REPLICA</replica></macros></yandex>" > /etc/clickhouse-server/conf.d/macros.xml && /usr/bin/clickhouse-server --config=$$CLICKHOUSE_CONFIG'
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>100M</size>
<count>1</count>
</logger>
<http_port>8123</http_port>
<!--
<https_port>8443</https_port>
-->
<!-- Used only with https_port. Full ssl options list: https://github.com/yandex/ClickHouse/blob/master/contrib/libpoco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 -->
<openSSL>
<server>
<!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt -->
<certificateFile>/etc/clickhouse-server/server.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/server.key</privateKeyFile>
<!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 -->
<dhParamsFile>/etc/clickhouse-server/dhparam.pem</dhParamsFile>
<verificationMode>none</verificationMode>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
</server>
<client>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<!-- Use for self-signed: <verificationMode>none</verificationMode> -->
<invalidCertificateHandler>
<!-- Use for self-signed: <name>AcceptCertificateHandler</name> -->
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 -->
<!--
<http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response>
-->
<tcp_port>9000</tcp_port>
<!-- Port for communication between replicas. Used for data exchange. -->
<interserver_http_port>9009</interserver_http_port>
<!-- Hostname that is used by other replicas to request this server.
If not specified, than it is determined analoguous to 'hostname -f' command.
This setting could be used to switch replication to another network interface.
-->
<!--
<interserver_http_host>example.yandex.ru</interserver_http_host>
-->
<!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. -->
<!-- <listen_host>::</listen_host> -->
<listen_host>0.0.0.0</listen_host>
<max_connections>4096</max_connections>
<keep_alive_timeout>3</keep_alive_timeout>
<!-- Maximum number of concurrent queries. -->
<max_concurrent_queries>100</max_concurrent_queries>
<!-- Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve
correct maximum value. -->
<!-- <max_open_files>262144</max_open_files> -->
<!-- Size of cache of uncompressed blocks of data, used in tables of MergeTree family.
In bytes. Cache is single for server. Memory is allocated only on demand.
Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
Uncompressed cache is advantageous only for very short queries and in rare cases.
-->
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
<!-- Approximate size of mark cache, used in tables of MergeTree family.
In bytes. Cache is single for server. Memory is allocated only on demand.
You should not lower this value.
-->
<mark_cache_size>5368709120</mark_cache_size>
<!-- Path to data directory, with trailing slash. -->
<path>/var/lib/clickhouse/</path>
<!-- Path to temporary data for processing hard queries. -->
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
<users_config>users.xml</users_config>
<!-- Default profile of settings.. -->
<default_profile>default</default_profile>
<!-- Default database. -->
<default_database>default</default_database>
<!-- Server time zone could be set here.
Time zone is used when converting between String and DateTime types,
when printing DateTime in text formats and parsing DateTime from text,
it is used in date and time related functions, if specific time zone was not passed as an argument.
Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan.
If not specified, system time zone at server startup is used.
Please note, that server could display time zone alias instead of specified name.
Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC.
-->
<!-- <timezone>Europe/Moscow</timezone> -->
<!-- You can specify umask here (see "man umask"). Server will apply it on startup.
Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read).
-->
<!-- <umask>022</umask> -->
<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.yandex/reference_en.html#Distributed -->
<remote_servers>
<metrika2clickhouse>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>clickhouse-ru-1.local</host>
<port>9000</port>
</replica>
<replica>
<host>clickhouse-ru-2.local</host>
<port>9000</port>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>clickhouse-eu-1.local</host>
<port>9000</port>
</replica>
<replica>
<host>clickhouse-eu-2.local</host>
<port>9000</port>
</replica>
</shard>
<shard>
<weight>1</weight>
<internal_replication>false</internal_replication>
<replica>
<host>clickhouse-us-1.local</host>
<port>9000</port>
</replica>
<replica>
<host>clickhouse-us-2.local</host>
<port>9000</port>
</replica>
</shard>
</metrika2clickhouse>
</remote_servers>
<!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file.
By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element.
Values for substitutions are specified in /yandex/name_of_substitution elements in that file.
-->
<!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables.
Optional. If you don't use replicated tables, you could omit that.
See https://clickhouse.yandex/reference_en.html#Data%20replication
-->
<zookeeper>
<node index="1">
<host>zookeeper</host>
<port>2181</port>
</node>
</zookeeper>
<!-- Substitutions for parameters of replicated tables.
Optional. If you don't use replicated tables, you could omit that.
See https://clickhouse.yandex/reference_en.html#Creating%20replicated%20tables
-->
<macros incl="macros" optional="true"/>
<!-- Reloading interval for embedded dictionaries, in seconds. Default: 3600. -->
<builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval>
<!-- Maximum session timeout, in seconds. Default: 3600. -->
<max_session_timeout>3600</max_session_timeout>
<!-- Default session timeout, in seconds. Default: 60. -->
<default_session_timeout>60</default_session_timeout>
<!-- Sending data to Graphite for monitoring. Several sections can be defined. -->
<!--
interval - send every X second
root_path - prefix for keys
metrics - send data from table system.metrics
events - send data from table system.events
asynchronous_metrics - send data from table system.asynchronous_metrics
-->
<use_graphite>false</use_graphite>
<!--
<graphite>
<host>localhost</host>
<port>42000</port>
<timeout>0.1</timeout>
<interval>60</interval>
<root_path>one_min</root_path>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
</graphite>
<graphite>
<host>localhost</host>
<port>42000</port>
<timeout>0.1</timeout>
<interval>1</interval>
<root_path>one_sec</root_path>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>false</asynchronous_metrics>
</graphite>
-->
<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<!-- What table to insert data. If table is not exist, it will be created.
When query log structure is changed after system update,
then old table will be renamed and new table will be created automatically.
-->
<database>system</database>
<table>query_log</table>
<!-- Interval of flushing data. -->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
<!-- Uncomment if use part_log
<part_log>
<database>system</database>
<table>part_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</part_log>
-->
<!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
See https://clickhouse.yandex/reference_en.html#Internal%20dictionaries
-->
<!-- Path to file with region hierarchy. -->
<!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> -->
<!-- Path to directory with files containing names of regions -->
<!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> -->
<!-- Configuration of external dictionaries. See:
https://clickhouse.yandex/reference_en.html#External%20Dictionaries
-->
<dictionaries_config>*_dictionary.xml</dictionaries_config>
<!-- Uncomment if you want data to be compressed 30-100% better.
Don't do that if you just started using ClickHouse.
-->
<compression incl="clickhouse_compression">
<!-- Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. -->
<case>
<!-- Conditions. All must be satisfied. Some conditions may be omitted. -->
<min_part_size>10000000000</min_part_size> <!-- Min part size in bits. -->
<min_part_size_ratio>0.01</min_part_size_ratio> <!-- Min size of part relative to whole table size. -->
<!-- What compression method to use. -->
<method>zstd</method> <!-- Keep in mind that zstd compression library is highly experimental. -->
</case>
</compression>
<resharding>
<task_queue_path>/clickhouse/task_queue</task_queue_path>
</resharding>
<!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster.
Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. -->
<distributed_ddl>
<!-- Path in ZooKeeper to queue with DDL queries -->
<path>/clickhouse/task_queue/ddl</path>
</distributed_ddl>
<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->
<!--
<merge_tree>
<max_suspicious_broken_parts>5</max_suspicious_broken_parts>
</merge_tree>
-->
<!-- Protection from accidental DROP.
If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query.
If you want do delete one table and don't want to restart clickhouse-server, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once.
By default max_table_size_to_drop is 50GB, max_table_size_to_drop=0 allows to DROP any tables.
Uncomment to disable protection.
-->
<!-- <max_table_size_to_drop>0</max_table_size_to_drop> -->
<!-- Example of parameters for GraphiteMergeTree table engine -->
<graphite_rollup_example>
<pattern>
<regexp>click_cost</regexp>
<function>any</function>
<retention>
<age>0</age>
<precision>3600</precision>
</retention>
<retention>
<age>86400</age>
<precision>60</precision>
</retention>
</pattern>
<default>
<function>max</function>
<retention>
<age>0</age>
<precision>60</precision>
</retention>
<retention>
<age>3600</age>
<precision>300</precision>
</retention>
<retention>
<age>86400</age>
<precision>3600</precision>
</retention>
</default>
</graphite_rollup_example>
</yandex>
CREATE TABLE IF NOT EXISTS default.appmetrika_clicks_replicated (
app_id UInt64 DEFAULT 0,
load_time DateTime DEFAULT now (),
click_url_parameters String,
os_version String,
android_id String,
ios_ifv String,
ios_ifa String,
publisher_name String,
publisher_id UInt64,
tracker_name String,
click_id String,
device_manufacturer String,
google_aid String,
tracking_id UInt64,
click_timestamp UInt64,
click_datetime DateTime,
device_type String,
device_model String,
windows_aid String,
click_ipv6 String,
click_date MATERIALIZED toDate (click_datetime),
os_name String,
click_user_agent String,
country_iso_code FixedString (3),
city String
) ENGINE=ReplicatedMergeTree (
'/clickhouse/tables/{layer}-{shard}/appmetrika_clicks_replicated',
'{replica}',
click_date, click_ipv6, (app_id, publisher_id, click_ipv6, tracking_id), 8192
)
version: '2'
services:
zookeeper:
image: zookeeper
clickhouse-ru-1.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
environment:
CLICKHOUSE_SHARD: 1
CLICKHOUSE_REPLICA: clickhouse-ru-1.local
depends_on:
- zookeeper
links:
- zookeeper
clickhouse-ru-2.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
depends_on:
- zookeeper
links:
- zookeeper
environment:
CLICKHOUSE_SHARD: 1
CLICKHOUSE_REPLICA: clickhouse-ru-2.local
clickhouse-eu-1.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
depends_on:
- zookeeper
links:
- zookeeper
environment:
CLICKHOUSE_SHARD: 2
CLICKHOUSE_REPLICA: clickhouse-eu-1.local
clickhouse-eu-2.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
depends_on:
- zookeeper
links:
- zookeeper
environment:
CLICKHOUSE_SHARD: 2
CLICKHOUSE_REPLICA: clickhouse-eu-2.local
clickhouse-us-1.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
depends_on:
- zookeeper
links:
- zookeeper
environment:
CLICKHOUSE_SHARD: 3
CLICKHOUSE_REPLICA: clickhouse-us-1.local
clickhouse-us-2.local:
extends:
file: clickhouse-compose.yml
service: clickhouse.base
depends_on:
- zookeeper
links:
- zookeeper
environment:
CLICKHOUSE_SHARD: 3
CLICKHOUSE_REPLICA: clickhouse-us-2.local
clickhouse-client.local:
image: yandex/clickhouse-client
depends_on:
- zookeeper
- clickhouse-ru-1.local
- clickhouse-ru-2.local
- clickhouse-eu-1.local
- clickhouse-eu-2.local
- clickhouse-us-1.local
- clickhouse-us-2.local
links:
- zookeeper
- clickhouse-ru-1.local
- clickhouse-ru-2.local
- clickhouse-eu-1.local
- clickhouse-eu-2.local
- clickhouse-us-1.local
- clickhouse-us-2.local
<?xml version="1.0"?>
<yandex>
<!-- Profiles of settings. -->
<profiles>
<!-- Default settings. -->
<default>
<!-- Maximum memory usage for processing single query, in bytes 2G. -->
<max_memory_usage>2000000000</max_memory_usage>
<max_memory_usage_for_user>2000000000</max_memory_usage_for_user>
<max_memory_usage_for_all_queries>2000000000</max_memory_usage_for_all_queries>
<background_pool_size>2</background_pool_size>
<!--
<min_insert_block_size_bytes>67108864</min_insert_block_size_bytes>
<max_insert_block_size>524288</max_insert_block_size>
<min_insert_block_size_rows>10000</min_insert_block_size_rows>
-->
<!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
<use_uncompressed_cache>1</use_uncompressed_cache>
<!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica
with minumum number of different symbols between replica's hostname and local hostname
(Hamming distance).
in_order - first live replica is choosen in specified order.
-->
<load_balancing>random</load_balancing>
</default>
<!-- Profile that allows only read queries. -->
<readonly>
<readonly>1</readonly>
</readonly>
</profiles>
<!-- Users and ACL. -->
<users>
<!-- If user name was not specified, 'default' user is used. -->
<default>
<!-- Password could be specified in plaintext or in SHA256 (in hex format).
If you want to specify password in plaintext (not recommended), place it in 'password' element.
Example: <password>qwerty</password>.
Password could be empty.
If you want to specify SHA256, place it in 'password_sha256_hex' element.
Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex>
How to generate decent password:
Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-'
In first line will be password and in second - corresponding SHA256.
-->
<password></password>
<!-- List of networks with open access.
To open access from everywhere, specify:
<ip>::/0</ip>
To open access only from localhost, specify:
<ip>::1</ip>
<ip>127.0.0.1</ip>
Each element of list has one of the following forms:
<ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 2a02:6b8::3 or 2a02:6b8::3/64.
<host> Hostname. Example: server01.yandex.ru.
To check access, DNS query is performed, and all received addresses compared to peer address.
<host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$
To check access, DNS PTR query is performed for peer address and then regexp is applied.
Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address.
Strongly recommended that regexp is ends with $
All results of DNS requests are cached till server restart.
-->
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<!-- Settings profile for user. -->
<profile>default</profile>
<!-- Quota for user. -->
<quota>default</quota>
</default>
<!-- Example of user with readonly access. -->
<readonly>
<password></password>
<networks incl="networks" replace="replace">
<ip>::1</ip>
<ip>127.0.0.1</ip>
</networks>
<profile>readonly</profile>
<quota>default</quota>
</readonly>
</users>
<!-- Quotas. -->
<quotas>
<!-- Name of quota. -->
<default>
<!-- Limits for time interval. You could specify many intervals with different limits. -->
<interval>
<!-- Length of interval. -->
<duration>3600</duration>
<!-- No limits. Just calculate resource usage for time interval. -->
<queries>0</queries>
<errors>0</errors>
<result_rows>0</result_rows>
<read_rows>0</read_rows>
<execution_time>0</execution_time>
</interval>
</default>
</quotas>
</yandex>
# -*- mode: ruby -*-
# vi: set ft=ruby :
Vagrant.configure(2) do |config|
config.vm.box = "ubuntu/xenial64"
config.vm.box_check_update = false
config.hostmanager.enabled = true
config.hostmanager.manage_host = true
config.hostmanager.ignore_private_ip = false
config.hostmanager.include_offline = false
config.vm.provider "virtualbox" do |vb|
vb.gui = false
vb.memory = "2048"
vb.customize ["setextradata", :id, "VBoxInternal2/SharedFoldersEnableSymlinksCreate/vagrant", "1"]
end
config.vm.define :develop do |develop|
develop.vm.host_name = "local-develop-clickhouse-pro"
develop.hostmanager.aliases = ["local.develop.clickhouse.pro"]
develop.vm.network "private_network", ip: "172.16.2.77"
develop.vm.provision "shell", inline: <<-SHELL
set -xeuo pipefail
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get install -y apt-transport-https software-properties-common aptitude
# docker
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8D81803C0EBFCD88
add-apt-repository "deb https://download.docker.com/linux/ubuntu xenial edge"
apt-get update
apt-get install -y docker-ce
apt-get install -y htop ethtool mc iotop
apt-get install -y python-pip
pip install -U pip
pip install docker-compose
cd /vagrant
docker-compose run --entrypoint="/bin/bash -c" clickhouse-client.local ls -la
sleep 5
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-ru-1.local
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-ru-2.local
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-eu-1.local
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-eu-2.local
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-us-1.local
cat create_table.sql | docker-compose run clickhouse-client.local -h clickhouse-us-2.local
SHELL
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment