Skip to content

Instantly share code, notes, and snippets.

@MattJermyWright
Last active December 31, 2017 14:47
Show Gist options
  • Save MattJermyWright/4e091271f2687d82f34a to your computer and use it in GitHub Desktop.
Save MattJermyWright/4e091271f2687d82f34a to your computer and use it in GitHub Desktop.
Vertica commonly used SQL
# JDBC Connection String
library("RJDBC")
# Connect to driver
drv <- JDBC("java.sql.Driver",
"/Users/matt/Dropbox/code/jdbc/vertica-jdbc-7.1.1-0.jar",
identifier.quote="\"")
conn <- dbConnect(drv, "jdbc:vertica://VerticaHost:5433/databaseName", "user", "pwd")
-- Loads a standard CSV file / non-quoted from local system file
COPY public.table_name
FROM local 'filename.csv'
DELIMITER ','
-- RECORD TERMINATOR E'\r\n' -- This is the record terminator - use E for escape sequences
exceptions 'exceptions.txt' rejected data 'rejected.csv';
-- Loads a standard Tab-delimited file / non-quoted from local system file
COPY public.table_name
FROM local 'filename.csv'
DELIMITER E'\t'
-- RECORD TERMINATOR E'\r\n' -- This is the record terminator - use E for escape sequences
exceptions 'exceptions.txt' rejected data 'rejected.csv';
-- Random Sample - select distinct identifier, and then randomize it
-- Suitable for small queries (because you have to do a subselect and join
-- against this relation).
-- Remember that if you attempt to do this as a subselect, it will likely
-- favor any keys that have multiple records as they will have a greater
-- frequency to be in the final distribution
SELECT DISTINCT rm_num AS key_to_sample
FROM mkt_customer_master a
WHERE random() < .75
-- Use this to create a temporary tally table with sampled values, for
-- use when joining against tables in the future
DROP TABLE tallySample;
CREATE TABLE tallySample AS
SELECT rm_num,
Random() AS seed
FROM mkt_customer_master a
WHERE date_as_of_date = To_date('20140930', 'YYYYMMDD')
GROUP BY 1;
COMMIT;
-- Or using a temporary table if you're concerned about space
DROP TABLE IF EXISTS tallySample;
CREATE temporary TABLE tallySample
on commit preserve rows AS
SELECT rm_num,
random() AS seed
FROM mkt_customer_master a
GROUP BY 1;
-- Join using the tally table:
SELECT a.*
FROM mkt_customer_master a
INNER JOIN tallySample b
ON a.rm_num = b.rm_num
-- Use the seed parameter to select a percentage to sample
AND b.seed < .1
-- When using this as a subselect (if you choose not to make a temp table),
-- make certain that you join the random() function identifier
-- as a seed value in the query and then filter during the join.
-- The vertica query optimizer is really agressive and will distribute
-- the random function internally with extreme prejudice. :)
-- I'm illustrating this using the WITH clause, but it could easily be done
-- with a subselect.
WITH sample_customers
AS (SELECT z.*,
Random() AS seed
FROM (
-- Identify the unique key from which to randomize
SELECT DISTINCT rm_num
FROM mkt_customer_master
WHERE date_as_of_date = To_date('20140930', 'YYYYMMDD')) z)
-- Now on to the select
SELECT Count(*),
Count(DISTINCT c.rm_num) AS people
FROM (SELECT a.*
FROM sample_customers b
INNER JOIN mkt_customer_master a
ON b.rm_num = a.rm_num
-- Use the seed value to filter / collect sample percentage
AND b.seed < .1) c
# -*- mode: ruby -*-
# vi: set ft=ruby :
# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
VAGRANTFILE_API_VERSION = "2"
Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
# All Vagrant configuration is done here. The most common configuration
# options are documented and commented below. For a complete reference,
# please see the online documentation at vagrantup.com.
# Every Vagrant virtual environment requires a box to build off of.
config.vm.box = "ubuntu/precise64"
config.vm.provider :virtualbox do |vb|
vb.memory = 4096
vb.cpus = 4
file_to_disk = 'large_disk.vdi'
swap = 'swap.vdi'
# 200 GB drive + 8GB Drive
vb.customize ['createhd', '--filename', file_to_disk, '--size', 200 * 1024]
vb.customize ['storageattach', :id, '--storagectl', 'SATAController', '--port', 1, '--device', 0, '--type', 'hdd', '--medium', file_to_disk]
vb.customize ['createhd', '--filename', swap, '--size', 8 * 1024]
vb.customize ['storageattach', :id, '--storagectl', 'SATAController', '--port', 2, '--device', 0, '--type', 'hdd', '--medium', swap]
end
config.vm.provision "shell", path: "Vagrantfile-postInstall.sh"
# Disable automatic box update checking. If you disable this, then
# boxes will only be checked for updates when the user runs
# `vagrant box outdated`. This is not recommended.
# config.vm.box_check_update = false
# Create a forwarded port mapping which allows access to a specific port
# within the machine from a port on the host machine. In the example below,
# accessing "localhost:8080" will access port 80 on the guest machine.
# config.vm.network "forwarded_port", guest: 80, host: 8080
# Create a private network, which allows host-only access to the machine
# using a specific IP.
# config.vm.network "private_network", ip: "192.168.33.10"
# Create a public network, which generally matched to bridged network.
# Bridged networks make the machine appear as another physical device on
# your network.
# config.vm.network "public_network"
# If true, then any SSH connections made will enable agent forwarding.
# Default value: false
# config.ssh.forward_agent = true
# Share an additional folder to the guest VM. The first argument is
# the path on the host to the actual folder. The second argument is
# the path on the guest to mount the folder. And the optional third
# argument is a set of non-required options.
# config.vm.synced_folder "../data", "/vagrant_data"
# Provider-specific configuration so you can fine-tune various
# backing providers for Vagrant. These expose provider-specific options.
# Example for VirtualBox:
#
# config.vm.provider "virtualbox" do |vb|
# # Don't boot with headless mode
# vb.gui = true
#
# # Use VBoxManage to customize the VM. For example to change memory:
# vb.customize ["modifyvm", :id, "--memory", "1024"]
# end
#
# View the documentation for the provider you're using for more
# information on available options.
# Enable provisioning with CFEngine. CFEngine Community packages are
# automatically installed. For example, configure the host as a
# policy server and optionally a policy file to run:
#
# config.vm.provision "cfengine" do |cf|
# cf.am_policy_hub = true
# # cf.run_file = "motd.cf"
# end
#
# You can also configure and bootstrap a client to an existing
# policy server:
#
# config.vm.provision "cfengine" do |cf|
# cf.policy_server_address = "10.0.2.15"
# end
# Enable provisioning with Puppet stand alone. Puppet manifests
# are contained in a directory path relative to this Vagrantfile.
# You will need to create the manifests directory and a manifest in
# the file default.pp in the manifests_path directory.
#
# config.vm.provision "puppet" do |puppet|
# puppet.manifests_path = "manifests"
# puppet.manifest_file = "default.pp"
# end
# Enable provisioning with chef solo, specifying a cookbooks path, roles
# path, and data_bags path (all relative to this Vagrantfile), and adding
# some recipes and/or roles.
#
# config.vm.provision "chef_solo" do |chef|
# chef.cookbooks_path = "../my-recipes/cookbooks"
# chef.roles_path = "../my-recipes/roles"
# chef.data_bags_path = "../my-recipes/data_bags"
# chef.add_recipe "mysql"
# chef.add_role "web"
#
# # You may also specify custom JSON attributes:
# chef.json = { mysql_password: "foo" }
# end
# Enable provisioning with chef server, specifying the chef server URL,
# and the path to the validation key (relative to this Vagrantfile).
#
# The Opscode Platform uses HTTPS. Substitute your organization for
# ORGNAME in the URL and validation key.
#
# If you have your own Chef Server, use the appropriate URL, which may be
# HTTP instead of HTTPS depending on your configuration. Also change the
# validation key to validation.pem.
#
# config.vm.provision "chef_client" do |chef|
# chef.chef_server_url = "https://api.opscode.com/organizations/ORGNAME"
# chef.validation_key_path = "ORGNAME-validator.pem"
# end
#
# If you're using the Opscode platform, your validator client is
# ORGNAME-validator, replacing ORGNAME with your organization name.
#
# If you have your own Chef Server, the default validation client name is
# chef-validator, unless you changed the configuration.
#
# chef.validation_client_name = "ORGNAME-validator"
end
# Installing - Update rc.local
printf "echo \"noop\" > /sys/block/sda/queue/scheduler\n" > /etc/rc.local
printf "echo \"noop\" > /sys/block/sdb/queue/scheduler\n" >> /etc/rc.local
printf "/sbin/blockdev --setra 2048 /dev/sda\n" >> /etc/rc.local
printf "/sbin/blockdev --setra 2048 /dev/sdb\n" >> /etc/rc.local
printf "/sbin/swapon /dev/sdc\n" >> /etc/rc.local
printf "echo \"never\" > /sys/kernel/mm/transparent_hugepage/enabled\n" >> /etc/rc.local
printf "exit 0\n" >> /etc/rc.local
# Setup data disk
(echo o; echo n; echo p; echo 1; echo ; echo; echo w) | fdisk /dev/sdb
/sbin/mkfs.ext4 /dev/sdb1
cat "/dev/sdb1 /data ext4 defaults,noatime 0 0" >> /etc/fstab
/bin/mount -a
apt-get update
apt-get -y upgrade
apt-get -y -f install
apt-get -y install ntp
# apt-get -y install openssh-server openssh-client # Not required for Vagrant
rm -f /bin/sh && ln -s /bin/bash /bin/sh
# Format and enable swap
mkswap /dev/sdc
swapon /dev/sdc
# Run RC Local to enable all settings:
/etc/rc.local
# Get latest vertica image
echo "Downloading Vertica..."
wget --quiet https://s3.amazonaws.com/zahlen/vertica/vertica_latest_amd64.deb
# Installing...
dpkg -i vertica_latest_amd64.deb
# Setup Vertica
/opt/vertica/sbin/install_vertica --hosts 127.0.0.1
# Remaining instructions
echo "Run the following instructions:"
echo "-------------------------------"
echo "vagrant ssh"
echo "sudo su - dbadmin -c /opt/vertica/bin/admintools"
#!/bin/bash
# See https://my.vertica.com/docs/4.1/HTML/Master/15261.htm
vsql -U username -w passwd -h testdb01 -d vmart -At -c "SELECT * from store.store_sales_fact" \
| pv -lpetr -s `vsql -U username -w passwd -h testdb01 -d vmart -At -c "SELECT COUNT (*) FROM store.store_sales_fact;"` \
| vsql -U username -w passwd -d vmart -c "COPY store.store_sales_fact FROM STDIN DELIMITER '|';"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment