Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save yihyang/1163bf9443d5acee61c100cfdfeb568b to your computer and use it in GitHub Desktop.
Save yihyang/1163bf9443d5acee61c100cfdfeb568b to your computer and use it in GitHub Desktop.
Introduction to SQL for BigQuery and Cloud SQL
# Exploring the BigQuery Console
SELECT end_station_name FROM `bigquery-public-data.london_bicycles.cycle_hire`;
SELECT * FROM `bigquery-public-data.london_bicycles.cycle_hire` WHERE duration>=1200;
SELECT start_station_name FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name;
# More SQL Keywords: GROUP BY, COUNT, AS, and ORDER BY
SELECT start_station_name, COUNT(*) FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name;
SELECT start_station_name, COUNT(*) AS num_starts FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name;
SELECT start_station_name, COUNT(*) AS num FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name ORDER BY start_station_name;
SELECT start_station_name, COUNT(*) AS num FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name ORDER BY num;
SELECT start_station_name, COUNT(*) AS num FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name ORDER BY num DESC;
Working with Cloud SQL
SELECT start_station_name, COUNT(*) AS num FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY start_station_name ORDER BY num DESC;
SELECT end_station_name, COUNT(*) AS num FROM `bigquery-public-data.london_bicycles.cycle_hire` GROUP BY end_station_name ORDER BY num DESC;
New Queries in Cloud SQL
gcloud auth list
gcloud config list project
gcloud sql connect qwiklabs-demo --user=root
CREATE DATABASE bike;
USE bike;
CREATE TABLE london1 (start_station_name VARCHAR(255), num INT);
USE bike;
CREATE TABLE london2 (end_station_name VARCHAR(255), num INT);
SELECT * FROM london1;
SELECT * FROM london2;
DELETE FROM london1 WHERE num=0;
DELETE FROM london2 WHERE num=0;
INSERT INTO london1 (start_station_name, num) VALUES ("test destination", 1);
SELECT start_station_name AS top_stations, num FROM london1 WHERE num>100000
UNION
SELECT end_station_name, num FROM london2 WHERE num>100000
ORDER BY top_stations DESC;
Cloud SQL for MySQL: Qwik Start
gcloud sql connect myinstance --user=root
CREATE DATABASE guestbook;
USE guestbook;
CREATE TABLE entries (guestName VARCHAR(255), content VARCHAR(255),
entryID INT NOT NULL AUTO_INCREMENT, PRIMARY KEY(entryID));
INSERT INTO entries (guestName, content) values ("first guest", "I got here!");
INSERT INTO entries (guestName, content) values ("second guest", "Me too!");
SELECT * FROM entries;
Cloud SQL for PostgreSQL: Qwik Start
Connect to your instance using the psql client in the Cloud Shell
gcloud sql connect myinstance --user=postgres
CREATE TABLE guestbook (guestName VARCHAR(255), content VARCHAR(255),
entryID SERIAL PRIMARY KEY);
INSERT INTO guestbook (guestName, content) values ('first guest', 'I got here!');
INSERT INTO guestbook (guestName, content) values ('second guest', 'Me too!');
SELECT * FROM guestbook;
04 - Loading Data into Google Cloud SQL
Preparing your Environment
git clone \
https://github.com/GoogleCloudPlatform/data-science-on-gcp/
cd data-science-on-gcp/03_sqlstudio
export PROJECT_ID=$(gcloud info --format='value(config.project)')
export BUCKET=${PROJECT_ID}-ml
Create a Cloud SQL instance
gcloud sql instances create flights \
--tier=db-n1-standard-1 --activation-policy=ALWAYS
gcloud sql users set-password root --host % --instance flights \
--password Passw0rd
export ADDRESS=$(wget -qO - http://ipecho.net/plain)/32
gcloud sql instances patch flights --authorized-networks $ADDRESS
MYSQLIP=$(gcloud sql instances describe \
flights --format="value(ipAddresses.ipAddress)")
echo $MYSQLIP
mysql --host=$MYSQLIP --user=root \
--password --verbose < create_table.sql
mysql --host=$MYSQLIP --user=root --password
use bts;
describe flights;
Add data to Cloud SQL instance
counter=0
for FILE in 201501.csv 201502.csv; do
gsutil cp gs://$BUCKET/flights/raw/$FILE \
flights.csv-${counter}
counter=$((counter+1))
mysqlimport --local --host=$MYSQLIP --user=root --password \
--ignore-lines=1 --fields-terminated-by=',' bts flights.csv-*
Build the initial data model
use bts;
select DISTINCT(FL_DATE) from flights;
select DISTINCT(CARRIER) from flights;
select count(dest) from flights where arr_delay < 15 and dep_delay < 15;
select count(dest) from flights where arr_delay >= 15 and dep_delay < 15;
select count(dest) from flights where arr_delay < 15 and dep_delay >= 15;
select count(dest) from flights where arr_delay >= 15 and dep_delay >= 15;
SET @ARR_DELAY_THRESH = 15;
SET @DEP_DELAY_THRESH = 10;
# Correct - true negative
select count(dest) from flights where arr_delay < @ARR_DELAY_THRESH and dep_delay < @DEP_DELAY_THRESH;
# False negative
select count(dest) from flights where arr_delay >= @ARR_DELAY_THRESH and dep_delay < @DEP_DELAY_THRESH;
# False positive
select count(dest) from flights where arr_delay < @ARR_DELAY_THRESH and dep_delay >= @DEP_DELAY_THRESH;
# True positive
select count(dest) from flights where arr_delay >= @ARR_DELAY_THRESH and dep_delay >= @DEP_DELAY_THRESH;
SET @ARR_DELAY_THRESH = 15;
SET @DEP_DELAY_THRESH = 20;
# Correct - true negative
select count(dest) from flights where arr_delay < @ARR_DELAY_THRESH and dep_delay < @DEP_DELAY_THRESH;
# False negative
select count(dest) from flights where arr_delay >= @ARR_DELAY_THRESH and dep_delay < @DEP_DELAY_THRESH;
# False positive
select count(dest) from flights where arr_delay < @ARR_DELAY_THRESH and dep_delay >= @DEP_DELAY_THRESH;
# True positive
select count(dest) from flights where arr_delay >= @ARR_DELAY_THRESH and dep_delay >= @DEP_DELAY_THRESH;
Cloud SQL with Terraform
# Verifying the Terraform version
terraform version
# Download necessary files
mkdir sql-with-terraform
cd sql-with-terraform
gsutil cp -r gs://spls/gsp234/gsp234.zip .
mkdir sql-with-terraform
cd sql-with-terraform
gsutil cp -r gs://spls/gsp234/gsp234.zip .
unzip gsp234.zip
cat main.tf
# Run Terraform
terraform init
terraform plan -out=tfplan
terraform apply tfplan
# Installing the Cloud SQL Proxy
wget https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64 -O cloud_sql_proxy
chmod +x cloud_sql_proxy
# Test connection to the database
export GOOGLE_PROJECT=$(gcloud config get-value project)
MYSQL_DB_NAME=$(terraform output -json | jq -r '.instance_name.value')
MYSQL_CONN_NAME="${GOOGLE_PROJECT}:us-central1:${MYSQL_DB_NAME}"
./cloud_sql_proxy -instances=${MYSQL_CONN_NAME}=tcp:3306
cd ~/sql-with-terraform
echo MYSQL_PASSWORD=$(terraform output -json | jq -r '.generated_user_password.value')
mysql -udefault -p --host 127.0.0.1 default
Using Ruby on Rails with Cloud SQL for PostgreSQL
# Create a PostgreSQL Cloud SQL instance
gcloud sql instances create postgres-instance \
--database-version POSTGRES_9_6 \
--tier db-g1-small
gcloud sql users set-password postgres --host=% \
--instance postgres-instance \
--password [PASSWORD]
# Set up the Cloud SQL Proxy
wget https://dl.google.com/cloudsql/cloud_sql_proxy.linux.amd64
mv cloud_sql_proxy.linux.amd64 cloud_sql_proxy
chmod +x cloud_sql_proxy
sudo mkdir /cloudsql
sudo chmod 0777 /cloudsql
gcloud sql instances describe postgres-instance | grep connectionName
./cloud_sql_proxy -dir=/cloudsql \
-instances="[YOUR_INSTANCE_CONNECTION_NAME]"
# Install Ruby on Rails
gem install rails
rails --version
# Test the Generated Rails Application
bundle exec rails server --port 8080
# Set up Rails app with Cloud SQL for PostgreSQL
bundle add pg
bundle install
production:
adapter: postgresql
pool: 5
timeout: 5000
username: postgres
password: [PASSWORD]
database: postgres-database
host: /cloudsql/[YOUR_INSTANCE_CONNECTION_NAME]
# Generate a model
bundle exec rails generate model Cat name:string age:decimal
RAILS_ENV=production bundle exec rails db:create
RAILS_ENV=production bundle exec rails db:migrate
# Add entries
RAILS_ENV=production bundle exec rails console
Cat.create name: "Mr. Whiskers", age: 4
Cat.create name: "Ms. Paws", age: 2
exit
# List the different cats
bundle exec rails generate controller CatFriends index
Rails.application.routes.draw do
get 'cat_friends/index'
# For details on the DSL available within this file, see http://guides.rubyonrails.org/routing.html
root 'cat_friends#index'
end
# Display database entries
class CatFriendsController < ApplicationController
def index
@cats = Cat.all
end
end
<h1>A list of my Cats</h1>
<% @cats.each do |cat| %>
<%=cat.name%> is <%=cat.age%> years old!<br />
<% end %>
bundle exec rails secret
export SECRET_KEY_BASE=[SECRET_KEY]
RAILS_ENV=production bundle exec rails assets:precompile
RAILS_ENV=production bundle exec rails server --port 8080
# Deployment Configuration
cd app_name
entrypoint: bundle exec rackup --port $PORT
env: flex
runtime: ruby
env_variables:
SECRET_KEY_BASE: [SECRET KEY]
beta_settings:
cloud_sql_instances: [YOUR_INSTANCE_CONNECTION_NAME]
# Deploying the Application on App Engine
gcloud app create
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment