Fred Benenson fredbenenson

## generate_unicode.rb
#!/usr/bin/env ruby
# encoding: utf-8

require "unicode/name"
require "csv"
require "slugify"

(4884..4886).each do |char|
  begin

## app.R
# install.packages("mongolite")
require("mongolite")
require("tidyverse")
require("lubridate")
library(DT)

connection <- mongo(collection = "tweets", db = "tweets", url = "mongodb://localhost")

query <- '{
  "$and": [

## kickstarter_sql_style_guide.md

      
              1 file
            
          
              89 forks
            
          
              14 comments
            
          
              410 stars
            
          
                fredbenenson
                / kickstarter_sql_style_guide.md
            
            
              Last active
              June 24, 2024 03:28
            
              
                Kickstarter SQL Style Guide
              
          
  layout
  title
  description
  tags
  
  
  default
  SQL Style Guide
  A guide to writing clean, clear, and consistent SQL.
  
  
  data
  process
  
  
Purpose


## metaprogramming_dplyr.r
dataframe %>% parse(text = paste(sapply(dimensions, function(dimension) {
    paste0("mutate(", paste0(dimension, "_average = mean(", dimension, "))"))
}), collapse = " %>% "))

## data_frame_indexing_bug.r
# This is a reduction which seems to indicate an issue when
# adding a column using an arbitrary set of indexes.

# First, let's create a data-frame with some random values:
s <- data.frame(x = runif(10), y = runif(10))

# Now, two randomly generated lists of numbers that we'll use to try to index
# This could be created thusly:
# wrong <- sample(1:nrow(s), nrow(s) * 0.8), etc.
wrong <- c(3, 6, 7, 5, 1, 2, 9, 8)

## vim-mode-error.sh
`apm install vim-mode`

/Applications/Atom.app/Contents/Resources/app/apm/node_modules/atom-package-manager/node_modules/keytar/node_modules/bindings/bindings.js:83
        throw e
              ^
Error: Module version mismatch, refusing to load.
    at Object.Module._extensions..node (module.js:485:11)
    at Module.load (module.js:356:32)
    at Function.Module._load (module.js:312:12)
    at Module.require (module.js:362:17)

## seconds_since_midnight.sql
-- Note that this doesn't work:
-- SELECT DATEDIFF(second, DATE(NOW()), NOW());
-- =>
-- ERROR:  function pg_catalog.date_diff("unknown", date, timestamp with time zone) does not exist
-- HINT:  No function matches the given name and argument types. You may need to add explicit type casts.

-- This does work:
SELECT DATEDIFF(second, DATE(NOW()), SPLIT_PART(NOW(), '.', 1)::timestamp);


## example_redshift_query.sql
SELECT *
FROM
  (SELECT
    month,
    amount,
    pledge_count,
    SUM(1) OVER(PARTITION BY month ORDER BY pledge_count DESC ROWS UNBOUNDED PRECEDING) as row
  FROM
    (SELECT
      TO_CHAR(CONVERT_TIMEZONE('UTC', 'America/New_York', backings.pledged_at), 'YYYY-MM-01') as month,

## redshift_credentials.r
# Install the Redshift R library:
# https://github.com/pingles/redshift-r
# install.packages("~/Downloads/redshift-r-master", dependencies = T, repos = NULL, type = "source")

library(redshift)

redshift <- redshift.connect("jdbc:postgresql://REDSHIFT_DB:5439/DB_NAME", "LOGIN", "PASSWORD")

# Example Query:
data <- dbGetQuery(redshift, "SELECT COUNT(*) FROM table")

## citi_bike_share.r
library(ggplot2)
library(rjson)

# Strip out enclosing object so its just an array of stations before importing into R.
# e.g. data should be of the form:
# [
#  {"id":72,"stationName":"W 52 St & 11 Av","availableDocks":14,"totalDocks":39,"latitude":40.76727216,"longitude":-73.99392888,"statusValue":"In Service","statusKey":1,"availableBikes":21,"stAddress1":"W 52 St & 11 Av","stAddress2":"","city":"","postalCode":"","location":"","altitude":"","testStation":false,"lastCommunicationTime":null,"landMark":""},
#  ...
# ]
#
	#!/usr/bin/env ruby
	# encoding: utf-8

	require "unicode/name"
	require "csv"
	require "slugify"

	(4884..4886).each do \|char\|
	begin
	# install.packages("mongolite")
	require("mongolite")
	require("tidyverse")
	require("lubridate")
	library(DT)

	connection <- mongo(collection = "tweets", db = "tweets", url = "mongodb://localhost")

	query <- '{
	"$and": [
	dataframe %>% parse(text = paste(sapply(dimensions, function(dimension) {
	paste0("mutate(", paste0(dimension, "_average = mean(", dimension, "))"))
	}), collapse = " %>% "))
	# This is a reduction which seems to indicate an issue when
	# adding a column using an arbitrary set of indexes.

	# First, let's create a data-frame with some random values:
	s <- data.frame(x = runif(10), y = runif(10))

	# Now, two randomly generated lists of numbers that we'll use to try to index
	# This could be created thusly:
	# wrong <- sample(1:nrow(s), nrow(s) * 0.8), etc.
	wrong <- c(3, 6, 7, 5, 1, 2, 9, 8)
	`apm install vim-mode`

	/Applications/Atom.app/Contents/Resources/app/apm/node_modules/atom-package-manager/node_modules/keytar/node_modules/bindings/bindings.js:83
	throw e
	^
	Error: Module version mismatch, refusing to load.
	at Object.Module._extensions..node (module.js:485:11)
	at Module.load (module.js:356:32)
	at Function.Module._load (module.js:312:12)
	at Module.require (module.js:362:17)
	-- Note that this doesn't work:
	-- SELECT DATEDIFF(second, DATE(NOW()), NOW());
	-- =>
	-- ERROR: function pg_catalog.date_diff("unknown", date, timestamp with time zone) does not exist
	-- HINT: No function matches the given name and argument types. You may need to add explicit type casts.

	-- This does work:
	SELECT DATEDIFF(second, DATE(NOW()), SPLIT_PART(NOW(), '.', 1)::timestamp);
	SELECT *
	FROM
	(SELECT
	month,
	amount,
	pledge_count,
	SUM(1) OVER(PARTITION BY month ORDER BY pledge_count DESC ROWS UNBOUNDED PRECEDING) as row
	FROM
	(SELECT
	TO_CHAR(CONVERT_TIMEZONE('UTC', 'America/New_York', backings.pledged_at), 'YYYY-MM-01') as month,
	# Install the Redshift R library:
	# https://github.com/pingles/redshift-r
	# install.packages("~/Downloads/redshift-r-master", dependencies = T, repos = NULL, type = "source")

	library(redshift)

	redshift <- redshift.connect("jdbc:postgresql://REDSHIFT_DB:5439/DB_NAME", "LOGIN", "PASSWORD")

	# Example Query:
	data <- dbGetQuery(redshift, "SELECT COUNT(*) FROM table")
	library(ggplot2)
	library(rjson)

	# Strip out enclosing object so its just an array of stations before importing into R.
	# e.g. data should be of the form:
	# [
	# {"id":72,"stationName":"W 52 St & 11 Av","availableDocks":14,"totalDocks":39,"latitude":40.76727216,"longitude":-73.99392888,"statusValue":"In Service","statusKey":1,"availableBikes":21,"stAddress1":"W 52 St & 11 Av","stAddress2":"","city":"","postalCode":"","location":"","altitude":"","testStation":false,"lastCommunicationTime":null,"landMark":""},
	# ...
	# ]
	#