Skip to content

Instantly share code, notes, and snippets.

@HarlanH
HarlanH / run_dbt.sh
Created September 6, 2018 20:08
dbt full-refresh flag
#!/usr/bin/env bash
# When run, first tries to figure out if the full-refresh flag is set.
# Then, runs dbt with or without the flag.
# Do not run me in dev mode!
full_refresh_needed="$(
psql $REDSHIFT_URI -t <<EOF
SELECT full_refresh
@HarlanH
HarlanH / dbt-Metrics.Rmd
Created November 30, 2017 18:41
R Flexdashboard for dbt production metrics
---
title: "dbt Metrics"
output:
flexdashboard::flex_dashboard:
orientation: rows
---
<!--
If dbt is being run on an AWS server with logs being pulled into Cloudwatch,
@HarlanH
HarlanH / dbt_doc.py
Created August 3, 2017 20:57
extracts structured comments/documentation from a dbt directory tree
#!/usr/bin/env python
# This Q&D script scans through SQL files in the models directory and outputs a Markdown document
# with per-model comments.
import os
from os import path
import re
import warnings
import time
library(dplyr)
library(RPostgreSQL)
library(httr)
library(Lahman)
library(ggplot2)
# connect to the db
con <- src_postgres(dbname="harlan", host="localhost", user="harlan")
# upload the Batting db
@HarlanH
HarlanH / randy_venn.jl
Created August 31, 2014 00:12
VennEuler code for 3 statistical programming languages, per Randy Zwitch
using VennEuler
# 1 364 23.30% SAS only
# 10 350 22.41% Python only
# 11 70 4.48% Python and SAS
# 100 490 31.37% R only
# 101 68 4.35% R and SAS
# 110 200 12.80% R and Python
# 111 20 1.28% R, Python and SAS
@HarlanH
HarlanH / polygontest.R
Created February 27, 2014 03:08
rMaps/leaflet example that doesn't work (see Javascript console for weird error)
library(plyr)
library(rMaps)
venues <- structure(list(name = c("pivotal", "aol", "columbia"), lat = c(40.7403372,
40.7308948, 40.8074358), lon = c(-73.9951462, -73.9917096, -73.9625858 )),
.Names = c("name", "lat", "lon"), row.names = c(NA, -3L), class = "data.frame")
times_square <- c(40.7577, -73.9857)
map <- Leaflet$new()
@HarlanH
HarlanH / test1.js
Created August 9, 2013 14:29
node server for koding
var http = require('http');
http.createServer(function (req, res) {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Hello Koding');
}).listen(1337, '0.0.0.0');
console.log('Server running at http://host:1337/');
@HarlanH
HarlanH / vect_set.jl
Created December 27, 2012 18:37
draft intersect and union functions for Julia vectors
# to do set stuff with vectors, turn the B vector into a set, then
# iterate over the a vector, creating a set to remove dupes.
# This is moderately efficient, preserves order, and removes dupes.
function intersect{T}(a::Vector{T}, b::Vector{T})
bset = Set(b...)
aset = Set()
ret = T[]
for a_elem in a
if has(bset, a_elem) && !has(aset, a_elem)
push(ret, a_elem)
@HarlanH
HarlanH / IndexDict.jl
Created August 17, 2012 21:47
IndexDict.jl
type IndexDict{V} <: Associative{ByteString,V}
idx::Index
arr::Vector{V}
IndexDict() = new(Index(), Array(V,0))
end
IndexDict() = IndexDict{Any}()
# assignment by a string replaces or appends
function assign(id::IndexDict, v, key::ByteString)
@HarlanH
HarlanH / vswitch.R
Created June 6, 2012 14:28
vswitch
vswitch <- function(namedList, default=NA, selector) {
# Function adapted from Bill Dunlap that implements something along the lines of a vectorized switch statement.
# http://tolstoy.newcastle.edu.au/R/e8/devel/09/12/1122.html
#
# Args:
# namedList - e.g., list(times=df$a * df$b, plus=df$a + df$b)
# default - a value to assign to elements of selector that aren't matched in namedList
# selector - e.g., c('times', 'times', 'plus', 'exp', 'plus')
#
# Returns: a vector of values selected from namedList