Martin Monkman MonkmanMH

## gist:9190970

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              3 stars
            
          
                MonkmanMH
                / gist:9190970
            
            
              Last active
              December 15, 2023 03:19
            
              
                Categorical data analysis in R - a resource list
              
          
    Categorical data analysis in R - a resource list

R packages

vcd & vcdExtra
http://cran.r-project.org/web/packages/vcd/
http://cran.r-project.org/web/packages/vcdExtra/

  
## gist:8798762
# CALCULATING PERCENTILES IN R
#
# a basic percentile function using "ecdf" [Empirical Cumulative Distribution Function]
# using a data file "percentiledata" with variable VALUE
percentileFUN <- ecdf(percentiledata$VALUE)
percentileFUN
percentileFUN(percentiledata$VALUE)
# write the percentile values to the source file
percentiledata$pctl <- percentilefunction(percentiledata$VALUE)
#

## gist:7740998

      
              1 file
            
          
              4 forks
            
          
              1 comment
            
          
              3 stars
            
          
                MonkmanMH
                / gist:7740998
            
            
              Last active
              September 6, 2020 02:22
            
              
                Random number generation in R (rstats, #rstats)
              
          
    Random numbers in R

The creation of random numbers, or the random selection of elements in a set (or population), is an important part of statistics and data science.  From simulating coin tosses to selecting potential respondents for a survey, we have a heavy reliance on random number generation.
R offers us a variety of solutions for random number generation; here's a quick overview of some of the options.
runif, rbinom, rnorm

One simple solution is to use the runif function, which generates a stated number of values between two end points (but not the end points themselves!) The function uses the continuous uniform distribution, meaning that every value between the two end points has an equal probability of being sampled.

  
## gist:6891654
# BINOMIAL CONFIDENCE INTERVAL CALCULATOR
#
# the binomial distribuion approximates the Normal distribution
# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
#
# read the data file
bin_data <- read.csv("bin_data.csv")
#
#
# the binomial confidence calculator function

## gist:6861482
# PRINT THE INTEGERS 1 THROUGH 10
#
# VERSION 1 -- using while()
# make the initial assignment of variable count_1 to 0 (not necessary)
count_1 <- 0
# the while loop - conditional statement in the first parenthesis,
# then the repeated steps within the {}
while (count_1 < 10)
{  count_1 <- count_1 + 1
   print(count_1)

## gist:6486001

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                MonkmanMH
                / gist:6486001
            
            
              Last active
              December 22, 2015 14:29
                — forked from josecarlosgonz/GoogleMapsAndR.md
            
          
    Using Google Maps API and R

[source: http://www.r-bloggers.com/using-google-maps-api-and-r/]
[address modifications added by MonkmanMH]
This script uses RCurl and RJSONIO to download data from Google's API to get the latitude, longitude, location type, and formatted address
library(RCurl)


## HarbourCats_attendance_2013
num,date,day,day2,day.night,vs,attend,cloud,sun,temp.c,temp.f,wind,note
1,6/5/2013,Wed,1,1,Kelowna,3026,mainly sunny,4,21,70,,Opening Night
2,6/6/2013,Thu,1,1,Kelowna,1082,mainly sunny,4,18,64,,
3,6/7/2013,Fri,3,1,Kelowna,1542,mainly sunny,4,19,66,windy,
4,6/11/2013,Tue,1,1,Medford,1014,mostly cloudy,2,17,63,,
5,6/12/2013,Wed,1,1,Medford,1003,mostly cloudy,2,16,60,,
6,6/13/2013,Thu,1,1,Medford,1015,partly cloudy,3,19,66,,
7,6/21/2013,Fri,3,1,Bend,1248,sunny,5,18,64,,
8,6/22/2013,Sat,3,1,Bend,1640,sunny,5,21,70,,
9,6/23/2013,Sun,2,0,Bend,1246,cloudy,1,18,64,,

## gist:6048590
# ######################
#
# Blog with output and discussion:
# "Fair weather fans? (An R scatter plot matrix)" 2013-07-18
# http://bayesball.blogspot.ca/2013/07/fair-weather-fans-r-scatter-plot-matrix.html
#
# data: pulled from www.harbourcats.com
# saved on Google Drive:
# https://docs.google.com/spreadsheet/ccc?key=0Art4wpcrwqkBdHZvTUFzOUo5U3BzMHFveXdYOTdTWUE&usp=sharing
# File / Download as > Comma Separated Values (CSV)

## gist:5802497
#
# for details see
# http://bayesball.blogspot.ca/2013/06/annotating-select-points-on-x-y-plot.html
#
# load the ggplot2 and grid packages
library(ggplot2)
library(grid)
# read data (note csv files are renamed)
tbl1 = read.csv("FanGraphs_Leaderboard_h.csv")
tbl2 = read.csv("FanGraphs_Leaderboard_d.csv")

## gist:5711584
# load the package and data set "Teams"
install.packages("Lahman")
library("Lahman")
data(Teams)
#
#
# CREATE LEAGUE SUMMARY TABLES
# ============================
#
# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012
	# CALCULATING PERCENTILES IN R
	#
	# a basic percentile function using "ecdf" [Empirical Cumulative Distribution Function]
	# using a data file "percentiledata" with variable VALUE
	percentileFUN <- ecdf(percentiledata$VALUE)
	percentileFUN
	percentileFUN(percentiledata$VALUE)
	# write the percentile values to the source file
	percentiledata$pctl <- percentilefunction(percentiledata$VALUE)
	#
	# BINOMIAL CONFIDENCE INTERVAL CALCULATOR
	#
	# the binomial distribuion approximates the Normal distribution
	# http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval
	#
	# read the data file
	bin_data <- read.csv("bin_data.csv")
	#
	#
	# the binomial confidence calculator function
	# PRINT THE INTEGERS 1 THROUGH 10
	#
	# VERSION 1 -- using while()
	# make the initial assignment of variable count_1 to 0 (not necessary)
	count_1 <- 0
	# the while loop - conditional statement in the first parenthesis,
	# then the repeated steps within the {}
	while (count_1 < 10)
	{ count_1 <- count_1 + 1
	print(count_1)
	num,date,day,day2,day.night,vs,attend,cloud,sun,temp.c,temp.f,wind,note
	1,6/5/2013,Wed,1,1,Kelowna,3026,mainly sunny,4,21,70,,Opening Night
	2,6/6/2013,Thu,1,1,Kelowna,1082,mainly sunny,4,18,64,,
	3,6/7/2013,Fri,3,1,Kelowna,1542,mainly sunny,4,19,66,windy,
	4,6/11/2013,Tue,1,1,Medford,1014,mostly cloudy,2,17,63,,
	5,6/12/2013,Wed,1,1,Medford,1003,mostly cloudy,2,16,60,,
	6,6/13/2013,Thu,1,1,Medford,1015,partly cloudy,3,19,66,,
	7,6/21/2013,Fri,3,1,Bend,1248,sunny,5,18,64,,
	8,6/22/2013,Sat,3,1,Bend,1640,sunny,5,21,70,,
	9,6/23/2013,Sun,2,0,Bend,1246,cloudy,1,18,64,,
	# ######################
	#
	# Blog with output and discussion:
	# "Fair weather fans? (An R scatter plot matrix)" 2013-07-18
	# http://bayesball.blogspot.ca/2013/07/fair-weather-fans-r-scatter-plot-matrix.html
	#
	# data: pulled from www.harbourcats.com
	# saved on Google Drive:
	# https://docs.google.com/spreadsheet/ccc?key=0Art4wpcrwqkBdHZvTUFzOUo5U3BzMHFveXdYOTdTWUE&usp=sharing
	# File / Download as > Comma Separated Values (CSV)
	#
	# for details see
	# http://bayesball.blogspot.ca/2013/06/annotating-select-points-on-x-y-plot.html
	#
	# load the ggplot2 and grid packages
	library(ggplot2)
	library(grid)
	# read data (note csv files are renamed)
	tbl1 = read.csv("FanGraphs_Leaderboard_h.csv")
	tbl2 = read.csv("FanGraphs_Leaderboard_d.csv")
	# load the package and data set "Teams"
	install.packages("Lahman")
	library("Lahman")
	data(Teams)
	#
	#
	# CREATE LEAGUE SUMMARY TABLES
	# ============================
	#
	# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012