Brian Abelson abelsonlive

## dbt_backfill.py
#!/usr/bin/env python
import json
import copy
import sys
import argparse
from datetime import datetime, timedelta
from subprocess import Popen, PIPE


# dictionary of partiton names to variable generating functions

## config.yml
aws_access_key_id:
aws_secret_access_key:
rs_user:
rs_server:
rs_db:
rs_port:
rs_password:

## gist:3703504
install.packages("foreign")
library("foreign")
data = read.dbf("path/to/file.dbf")
write.csv(data, "path/to/file.csv", row.names=F)

## base_models.yml
profile_dir: '~/.dbt'                    # the location of dbt configurations
target: prod                             # the dbt profile to use for connection
source_schema: fivetran_app              # the tables' source schema (where fivetran imports your data to)
materialized: view                       # how to materialize these views
target_schema: app                       # the schema to materialize the base models under (where users will access the data from)
table_prefix: app                        # a prefix to prepend to each base model/view
directory: models/base/                  # the directory under which to save the base models
empty_as_null: true                      # whether or not to apply EMPTYASNULL logic to all text fields
incl_fivetran_deleted: false             # whether or not to include records marked as "deleted" by fivetran
excl_fivetran_synced: true               # whether or not to include the time fivetran synced each record

## lda.R
# Brian Abelson @brianabelson
# Harmony Institute
# December 5, 2012

# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
# it fits topic models using latent dirichlet allocation
# it provides arguments for cleaning the input text and tuning the parameters of the model
# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
# this allows you to easily model outcomes based on the distribution of topics within a collection of texts

## cbind.fill.R
# yay!
cbind.fill<-function(...){
    nm <- list(...)
    nm<-lapply(nm, as.matrix)
    n <- max(sapply(nm, nrow))
    do.call(cbind, lapply(nm, function (x)
    rbind(x, matrix(, n-nrow(x), ncol(x)))))
}

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                abelsonlive
                / keybase.md
            
            
              Created
              July 26, 2021 18:14
            
          
    Keybase proof

I hereby claim:

I am abelsonlive on github.
I am brianabelson (https://keybase.io/brianabelson) on keybase.
I have a public key ASB8plf3RA_IBu07tMbiBLNUcqZ1lS-uzC8TtsNL49eFfwo

To claim this, I am signing this object:

  
## srapeshell.R
# best practices for web scraping in R #

# function should be used with ldply
# eg:

ldply(urls, scrape)

# add a try to ignore broken links/ unresponsive pages
# eg:

## README.md

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                abelsonlive
                / README.md
            
            
              Last active
              September 9, 2019 16:43
            
              
                A simple two player game of tic tac toe in the terminal.
              
          
    tic tac toe

A simple two player game of Tic Tac Toe written in Python 3.6+
To play, download the file below, open a terminal, and run the following command:
$ python3 tic-tac-toe.py

  
## detect_superlatives.py
import sys
import json

import nltk
from nltk.tokenize import RegexpTokenizer

# For a list of all POS tags and their definitions, see:
# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
SUPERLATIVE_TAGS = {"JJS", "RBS"}
	#!/usr/bin/env python
	import json
	import copy
	import sys
	import argparse
	from datetime import datetime, timedelta
	from subprocess import Popen, PIPE


	# dictionary of partiton names to variable generating functions
	aws_access_key_id:
	aws_secret_access_key:
	rs_user:
	rs_server:
	rs_db:
	rs_port:
	rs_password:
	install.packages("foreign")
	library("foreign")
	data = read.dbf("path/to/file.dbf")
	write.csv(data, "path/to/file.csv", row.names=F)
	profile_dir: '~/.dbt' # the location of dbt configurations
	target: prod # the dbt profile to use for connection
	source_schema: fivetran_app # the tables' source schema (where fivetran imports your data to)
	materialized: view # how to materialize these views
	target_schema: app # the schema to materialize the base models under (where users will access the data from)
	table_prefix: app # a prefix to prepend to each base model/view
	directory: models/base/ # the directory under which to save the base models
	empty_as_null: true # whether or not to apply EMPTYASNULL logic to all text fields
	incl_fivetran_deleted: false # whether or not to include records marked as "deleted" by fivetran
	excl_fivetran_synced: true # whether or not to include the time fivetran synced each record
	# Brian Abelson @brianabelson
	# Harmony Institute
	# December 5, 2012

	# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
	# it fits topic models using latent dirichlet allocation
	# it provides arguments for cleaning the input text and tuning the parameters of the model
	# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
	# this allows you to easily model outcomes based on the distribution of topics within a collection of texts
	# yay!
	cbind.fill<-function(...){
	nm <- list(...)
	nm<-lapply(nm, as.matrix)
	n <- max(sapply(nm, nrow))
	do.call(cbind, lapply(nm, function (x)
	rbind(x, matrix(, n-nrow(x), ncol(x)))))
	}
	# best practices for web scraping in R #

	# function should be used with ldply
	# eg:

	ldply(urls, scrape)

	# add a try to ignore broken links/ unresponsive pages
	# eg:
	import sys
	import json

	import nltk
	from nltk.tokenize import RegexpTokenizer

	# For a list of all POS tags and their definitions, see:
	# https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
	SUPERLATIVE_TAGS = {"JJS", "RBS"}