wget https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/nyc-taxi-vectors.csv.gz
gzip -d nyc-taxi-vectors.csv.gz
-- data table
CREATE TABLE transactions
(
id UInt32,
vector Array(Float32),
customer UInt32,
)
ENGINE = MergeTree -- this can be a Null engine
ORDER BY id
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [[ -z "$CLOUD_ID" || -z "$CLOUD_SECRET" || -z "$AWS_ACCESS_KEY_ID" || -z "$AWS_SECRET_ACCESS_KEY" ]]; then | |
echo "Error: Required environment variables are not set." | |
exit 1 | |
fi | |
# identify the organization to create the service in | |
ORG_ID=$(curl --silent --user $CLOUD_ID:$CLOUD_SECRET https://api.clickhouse.cloud/v1/organizations | jq -r '.result[0].id') | |
ORG_NAME=$(curl --silent --user $CLOUD_ID:$CLOUD_SECRET https://api.clickhouse.cloud/v1/organizations | jq -r '.result[0].name') |
CREATE TABLE surveys
(
`response_id` Int64,
`development_activity` Enum8('I am a developer by profession' = 1, 'I am a student who is learning to code' = 2, 'I am not primarily a developer, but I write code sometimes as part of my work' = 3, 'I code primarily as a hobby' = 4, 'I used to be a developer by profession, but no longer am' = 5, 'None of these' = 6, 'NA' = 7),
`employment` Enum8('Independent contractor, freelancer, or self-employed' = 1, 'Student, full-time' = 2, 'Employed full-time' = 3, 'Student, part-time' = 4, 'I prefer not to say' = 5, 'Employed part-time' = 6, 'Not employed, but looking for work' = 7, 'Retired' = 8, 'Not employed, and not looking for work' = 9, 'NA' = 10),
`country` LowCardinality(String),
`us_state` LowCardinality(String),
`uk_county` LowCardinality(String),
`education_level` Enum8('Secondary school (e.g. American high school, German Realschule or Gymnasium, etc.)' = 1, 'Bachelor’s degree (B.A., B.S., B.Eng., etc.)' = 2, 'Master’s degree (M.
-- original table
CREATE TABLE hackernews_copy
(
`id` String,
`doc_id` String,
`comment` String,
`vector` Array(Float32),
`node_info` Tuple(start Nullable(UInt64), end Nullable(UInt64)),
`metadata` String,
diff -u <(docker run --rm clickhouse/clickhouse-server:23.12 clickhouse-local --query "SELECT * FROM system.contributors ORDER BY name") <(docker run --rm clickhouse/clickhouse-server:24.1 clickhouse-local --query "SELECT * FROM system.contributors ORDER BY name") | grep -E "^\+" | tail -n +2 | sed 's/^\+//' | tr '\n' ','
The following provides the details required to reproduce the demo given on the Real-time SQL analytics at scale: A story of open-source GitHub activity using ClickHouse + Grafana webinar.
This provides a Grafana dashboard showing an analysis of the history of the Grafana repository, including common commiters, most popular issues, largest commits and files with the longest history.
This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native git-import
tool distributed with ClickHouse.
The generated data provides a tsv
file for each of the following tables:
commits
- commits with statistics;file_changes
- files changed in every commit with the info about the change and statistics;line_changes
- every changed line in every changed file in every commit with full info about the line and the information about the previous change of this line.
CREATE EXTERNAL TABLE IF NOT EXISTS ookla (
quadkey string,
tile string,
avg_d_kbps int,
avg_u_kbps int,
avg_lat_ms int,
avg_lat_down_ms int,
avg_lat_up_ms int,
tests int,
NewerOlder