Matt Harris mrecos

## statebins test.r

      
              2 files
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mrecos
                / statebins test.r
            
            
              Last active
              August 29, 2015 14:15
            
              
                Qucik setup to produce statebins choropleth-like map
              
          
    !image

  
## parking_violation.R
# I started to play with the data and used this to get it a bit cleaned up and prepared for analysis
# I am sure my code is not the most efficient or prettiest, but if there are errors, please let me know
# Perhaps you will find this usefull in saving a few minutes getting the data ready for the fun part; analysis!
# DATA : https://www.opendataphilly.org/dataset/parking-violation

library(data.table)
library(lubridate)
setwd(<<YOUR DIRECTORY>>) # Set WD if desired
data <- fread("Parking_Violations.csv")
dat <- data.frame(data) # convert to a data.frame from data table

## Temp_graphic.Rd
# Here is my attempt to use ggplot2 to recreate Randy Olson's (@randal_olson), recreation of the famous Tufte New York City Weather chart.
# The original Tufte chart is here:
# http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=00014g
# Randy Olson recreated this chart for a post on 538 (http://fivethirtyeight.com/) linked here:
# http://fivethirtyeight.com/features/what-12-months-of-record-setting-temperatures-looks-like-across-the-u-s/?ex_cid=538twitter
# and Randy posted his data and Python code here:
# https://github.com/mrecos/data/tree/master/us-weather-history
# This is a ggplot implementation of Randy's basic design, but without the top and right side axis and legend
# I think the legend in Randy's plot may be added in Illustrator of something
# The only real hard-code (aside from .csv location) here is the y-axis labels that show the data is from July '14 to June '15,

## density_plot.r
# The original plot in question appears to be the results of a model
# based on scores and age, a distribution is predicted for score on
# each age class.  The range of predicted scores is visualized as a
# normal'ish density over each age class.  There is also a point which is the
# mean observation of the raw score at each age class.  This does not need to
# be at the mean of the distribution depending on fit of the model.
# There is also a line that is the predicted response of model over age classes.
#
# I did not have the data or model from this example, so I improvised.
# I first created sythetic model data based on the example, and from the

## loader.r
load.fun <- function(x) {
  x <- as.character(x)
  if(isTRUE(x %in% .packages(all.available=TRUE))) {
    eval(parse(text=paste("require(", x, ")", sep="")))
	print(paste(c(x, " : already installed; requiring"), collapse=''))
  } else {
	if(x == "twitteR"){
		print(paste(c(x, " : not installed; installing"), collapse=''))
		install_github("twitteR", username="geoffjentry")
		print(paste(c(x, " : installed and requiring"), collapse=''))

## Multi point drive times and compare.r
# Driving distance from multuple starts to multilpe destinations
# modified from the code of gmapsdistance package  by Rodrigo Azuero Melo
# https://cran.r-project.org/web/packages/gmapsdistance

gdist2 <- function (origin, destination, mode, key)
{
  typeKEY = class(key)
  if (typeKEY != "character") {
    stop("Key should be string")
  }

## Test_vs_Train_error.R
# function to generate samples of data
get_data <- function(n, exp_i, intercept, beta1, sigma, plot = TRUE){
  library(lattice)
  x <- sample(seq(1,100,0.1), n, replace = TRUE)
  gen_model <- intercept + beta1 * x^(exp_i)
  y <- rnorm(n, gen_model, sigma)
  dat <- data.frame(x, y)
  if(isTRUE(plot)){
    print(xyplot(y~x, type=c("smooth", "p"),col.line = "darkorange", lwd = 2))
  }

## Zubrow Pop Change ggplot2.r
library("ggplot2") # Must be dev version, use: devtools::install_github("hadley/ggplot2")
library("gridExtra")
library("extrafont") # Need to follow steps here: http://zevross.com/blog/2014/07/30/tired-of-using-helvetica-in-your-r-graphics-heres-how-to-use-the-fonts-you-like-2/

# create data frame
year <- c(1760, 1790, 1797, 1850, 1860, 1889, 1900, 1910, 1950)
sites <- c("Isleta", "Acoma", "Laguna", "Zuni", "Sandia", "San Felipe",
           "Santa Ana", "Zia", "Santo Domingo", "Jemez", "Cochiti",
           "Tesuque", "Nambe", "San Ildefonso", "Pojoaque", "Santa Clara",
           "San Juan", "Picuris", "Toas")

## Zubrow cowplot ggplot viz.R
library("ggplot2") # Must use Dev version as of 03/18/16
library("gridExtra")
library("extrafont") # for font selection
library("dplyr") # for data preperation
library("cowplot") # for combining plots

# Prepare data for plotting
# data from Zubrow, E.B.W. (1974), Population, Contact,and Climate in the New Mexican Pueblos
# prepared as a long format to facilitate plotting
year <- c(1760, 1790, 1797, 1850, 1860, 1889, 1900, 1910, 1950)

## DF string split and concatenate.r
lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
require(stringr)
require(splitstackshape)
  # this loops through all unique codes in code_field.  Potentially slow
  code_field[which(code_field == "")] <- NA
  unique_code <- unique(code_field)
  coded <- code_field
  for(i in seq_along(unique_code)){
    if(is.na(unique_code[i])){
      coded[is.na(coded)] <- is_NA # set this to whatever works
	# I started to play with the data and used this to get it a bit cleaned up and prepared for analysis
	# I am sure my code is not the most efficient or prettiest, but if there are errors, please let me know
	# Perhaps you will find this usefull in saving a few minutes getting the data ready for the fun part; analysis!
	# DATA : https://www.opendataphilly.org/dataset/parking-violation

	library(data.table)
	library(lubridate)
	setwd(<<YOUR DIRECTORY>>) # Set WD if desired
	data <- fread("Parking_Violations.csv")
	dat <- data.frame(data) # convert to a data.frame from data table
	# Here is my attempt to use ggplot2 to recreate Randy Olson's (@randal_olson), recreation of the famous Tufte New York City Weather chart.
	# The original Tufte chart is here:
	# http://www.edwardtufte.com/bboard/q-and-a-fetch-msg?msg_id=00014g
	# Randy Olson recreated this chart for a post on 538 (http://fivethirtyeight.com/) linked here:
	# http://fivethirtyeight.com/features/what-12-months-of-record-setting-temperatures-looks-like-across-the-u-s/?ex_cid=538twitter
	# and Randy posted his data and Python code here:
	# https://github.com/mrecos/data/tree/master/us-weather-history
	# This is a ggplot implementation of Randy's basic design, but without the top and right side axis and legend
	# I think the legend in Randy's plot may be added in Illustrator of something
	# The only real hard-code (aside from .csv location) here is the y-axis labels that show the data is from July '14 to June '15,
	# The original plot in question appears to be the results of a model
	# based on scores and age, a distribution is predicted for score on
	# each age class. The range of predicted scores is visualized as a
	# normal'ish density over each age class. There is also a point which is the
	# mean observation of the raw score at each age class. This does not need to
	# be at the mean of the distribution depending on fit of the model.
	# There is also a line that is the predicted response of model over age classes.
	#
	# I did not have the data or model from this example, so I improvised.
	# I first created sythetic model data based on the example, and from the
	load.fun <- function(x) {
	x <- as.character(x)
	if(isTRUE(x %in% .packages(all.available=TRUE))) {
	eval(parse(text=paste("require(", x, ")", sep="")))
	print(paste(c(x, " : already installed; requiring"), collapse=''))
	} else {
	if(x == "twitteR"){
	print(paste(c(x, " : not installed; installing"), collapse=''))
	install_github("twitteR", username="geoffjentry")
	print(paste(c(x, " : installed and requiring"), collapse=''))
	# Driving distance from multuple starts to multilpe destinations
	# modified from the code of gmapsdistance package by Rodrigo Azuero Melo
	# https://cran.r-project.org/web/packages/gmapsdistance

	gdist2 <- function (origin, destination, mode, key)
	{
	typeKEY = class(key)
	if (typeKEY != "character") {
	stop("Key should be string")
	}
	# function to generate samples of data
	get_data <- function(n, exp_i, intercept, beta1, sigma, plot = TRUE){
	library(lattice)
	x <- sample(seq(1,100,0.1), n, replace = TRUE)
	gen_model <- intercept + beta1 * x^(exp_i)
	y <- rnorm(n, gen_model, sigma)
	dat <- data.frame(x, y)
	if(isTRUE(plot)){
	print(xyplot(y~x, type=c("smooth", "p"),col.line = "darkorange", lwd = 2))
	}
	library("ggplot2") # Must be dev version, use: devtools::install_github("hadley/ggplot2")
	library("gridExtra")
	library("extrafont") # Need to follow steps here: http://zevross.com/blog/2014/07/30/tired-of-using-helvetica-in-your-r-graphics-heres-how-to-use-the-fonts-you-like-2/

	# create data frame
	year <- c(1760, 1790, 1797, 1850, 1860, 1889, 1900, 1910, 1950)
	sites <- c("Isleta", "Acoma", "Laguna", "Zuni", "Sandia", "San Felipe",
	"Santa Ana", "Zia", "Santo Domingo", "Jemez", "Cochiti",
	"Tesuque", "Nambe", "San Ildefonso", "Pojoaque", "Santa Clara",
	"San Juan", "Picuris", "Toas")
	library("ggplot2") # Must use Dev version as of 03/18/16
	library("gridExtra")
	library("extrafont") # for font selection
	library("dplyr") # for data preperation
	library("cowplot") # for combining plots

	# Prepare data for plotting
	# data from Zubrow, E.B.W. (1974), Population, Contact,and Climate in the New Mexican Pueblos
	# prepared as a long format to facilitate plotting
	year <- c(1760, 1790, 1797, 1850, 1860, 1889, 1900, 1910, 1950)
	lut_match <- function(LUT, code_field, desc_col = 2, is_NA = "NA", no_code = "N/A"){
	require(stringr)
	require(splitstackshape)
	# this loops through all unique codes in code_field. Potentially slow
	code_field[which(code_field == "")] <- NA
	unique_code <- unique(code_field)
	coded <- code_field
	for(i in seq_along(unique_code)){
	if(is.na(unique_code[i])){
	coded[is.na(coded)] <- is_NA # set this to whatever works