sdaza/myFunctions.R

## myFunctions.R
# MY R FUNCTIONS
# AUTHOR: SEBASTIAN DAZA
# Jun 02, 2014, 10:35 AM

# LOOKING FOR VARIABLES ####
lookvar  <- function(dat, varnames) {
  n  <- names(dat)

  nn  <- list()
    for (i in 1:length(varnames)) {
      nn[[i]]  <- grep(varnames[i],n)
    }

  nn  <- unlist(nn)

  if ( length(nn) >0 )
  {
    r  <- n[nn]
    return(r)
  }
  else
  { return("No variables found")}

}


# COLORS
map2color<-function(x,pal,limits=NULL){
    if(is.null(limits)) limits=range(x)
    pal[findInterval(x,seq(limits[1],limits[2],length.out=length(pal)+1), all.inside=TRUE)]
}

# LOAD AND INSTALL PACKAGES

ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

# transparent colors
makeTransparent = function(..., alpha=0.5) {

  if(alpha<0 | alpha>1) stop("alpha must be between 0 and 1")

  alpha = floor(255*alpha)
  newColor = col2rgb(col=unlist(list(...)), alpha=FALSE)

  .makeTransparent = function(col, alpha) {
    rgb(red=col[1], green=col[2], blue=col[3], alpha=alpha, maxColorValue=255)
  }

  newColor = apply(newColor, 2, .makeTransparent, alpha=alpha)

  return(newColor)

}

# FORMULA

formula <- function(var, symbol1, symbol2) {

# EXAMPLE ####
# formula(var, "~", "+")

  f <- as.formula(paste0(symbol1, paste0(paste0(var, collapse=symbol2))))
  return(f)
}

# REPLACE MISSING DATA
rmiss <-   function(data=DT, r=0) {
  # replace missing:
    for (i in names(DT))
        DT[is.na(get(i)),i:=r,with=FALSE]
}


# ASSIGN MISSINGS ####
assmis  <- function(dat, var, codes) {
# EXAMPLE ####
# dat <- assmis(dat, list(dvar), list(c(9,99)))

  if (sum(class(dat) %in% c("data.frame", "data.table"))>0) {

  pkgTest <- function(x) {

    is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])

    if (!is.installed(x))
    {
      install.packages(x)
      if(!require(x , character.only = TRUE)) stop("Package not found")
    }
    else {
      require(x, character.only = TRUE)
    }
  }

  pkgTest("data.table")

  if (class(var) %in% "list" & class(codes) %in%  "list"
      & (length(var)==length(codes) ))

  {

    dt  <- data.table(dat)

    for (i  in 1:length(var)) {

      for(j in 1:length(var[[i]])) {
        chari <-  paste0(var[[i]][j], ' %in% ' , 'codes[[', i, ']]')
        charj <-  paste0(var[[i]][j], ':= NA_integer_')
        dt[eval(parse(text=chari)), eval(parse(text=charj))]

      }
    }

    if (class(dat)[1]=="data.table") {
      return(r <- dt)
    }

    else if  (class(dat)[1]=="data.frame") {
      return(r  <- data.frame(dt))
    }
  }
  else {
    stop("Variables or codes are not defined as lists, or lists' elements are not the same")
  }
  }
      else{
        stop("The first object is not a data.frame or a data.table")
      }
}


# REVERSE SCALE FUNCTION

revscalev  <- function(dat, var, nvar) {

# EXAMPLE ####
# p <- revscale(dat, "var", "newvar")


  if (sum(class(dat) %in% c("data.frame", "data.table"))>0) {

  pkgTest <- function(x) {

    is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])

    if (!is.installed(x))
    {
      install.packages(x)
      if(!require(x , character.only = TRUE)) stop("Package not found")
    }
    else {
      require(x, character.only = TRUE)
    }
  }

  pkgTest("data.table")

  dt  <- data.table(dat)

  if (length(var)==length(nvar)) {

    for(n in 1:length(var)) {
      maxvalue  <- max(dt[, var[n], with=FALSE], na.rm=TRUE) + 1
      char <- paste0(nvar[n], ' := ', maxvalue, ' - ', var[n])
      dt[, eval(parse(text=char))]

    }
    if (class(dat)[1]=="data.table") {
      return(r <- dt)
    }

    else if  (class(dat)[1]=="data.frame") {
      return(r  <- data.frame(dt))
    }
}
  else {
    stop("Number of old and new variables is not same")
  }
}
  else{
    stop("The first object is not a data.frame or a data.table")
  }
}


# Z-Scores

zs <- function(x) (x - Mean (x) ) / Sd(x)

# zs <- function(dat, var) {

# if (sum(is.character(var))>0) {
# for (i in 1:length(var)) {
# char <- paste0("z", var[i], ' := (', var[i], "- Mean(", var[i], "))/Sd(", var[i],")")
# dat[, eval(parse(text=char))]
# }
# }
# else {
#   stop("Variables have to be strings")
# }
# }

# AS NUMERIC

asnumerics <- function(dat, cnames) {
  dat <- dat[ , cnames := lapply(dat[ , cnames, with=FALSE], as.numeric), with=FALSE]
}

# GET VARIABLES FROM SHEET ####

getVar <- function(addC = TRUE) {

  characterVector <- scan(pipe("pbpaste"), what = "character")

  formattedString <-    paste(characterVector , collapse="\", \"")
  formattedString <- paste("\"", formattedString, "\"", sep="")
  if (addC) formattedString <- paste("c(", formattedString , ")", sep = "")
  writeLines(formattedString, con = pipe("pbcopy"), sep = " ")
}

getNum <- function(addC = TRUE) {

  nums <- scan(pipe("pbpaste"), what = double())
  nums  <- paste(as.character(nums), collapse=", ")
  if (addC)  nums <- paste("c(", nums , ")", sep = "")
  writeLines(nums, con = pipe("pbcopy"), sep = " ")
}

# PASTE DATA ####

paste.data <- function(header=TRUE) {read.table(pipe("pbpaste"), header=header)}

# FUNCTIONS FOR TABLES

Mean <- function(x) as.numeric(mean(x, na.rm=TRUE))
Sd <- function(x) as.numeric(sd(x, na.rm=TRUE))
# N <- function(x) sum(!is.na(x))

Sum <- function(x) sum(x, na.rm=TRUE)

Sums <- function(x) {

if (sum(is.na(x))==length(x)) {
    y <- NA
    }

else {y <- sum(x, na.rm=TRUE)}

return(as.numeric(y))
  }


lrecode <- function(dat, var, newvar, old, new) {

# NEVER DEFINE A FIRST VALUE AS MISSING
# PUT MISSING VALUES AT THE END OF OLD/NEW SPECIFICATION

if (length(var)==length(newvar) & length(old)==length(new)) {

for (i in 1:length(var)) {

  for (h in 1:length(old)) {

  a <- paste0(var[i], "==", old[h])
  b <- paste0(newvar[i], ":= ", new[h])
  dat[eval(parse(text=a)), eval(parse(text=b))]

    }
 }
 return(dat)
}

else {
  stop("var or value list doesn't have the same size")

}
}


Max  <- function(x) {
  if (class(x)=="integer") {
  ifelse(all(is.na(x)), as.integer(NA) , max(x, na.rm = TRUE))
  }
  else {
  ifelse(all(is.na(x)), as.numeric(NA) , max(x, na.rm = TRUE))
  }
}

Min  <- function(x) {
  if (class(x)=="integer") {
  ifelse(all(is.na(x)), as.integer(NA) , min(x, na.rm = TRUE))
  }
  else {
  ifelse(all(is.na(x)), as.numeric(NA) , min(x, na.rm = TRUE))
  }
}

wtmean <- function(x, w) as.numeric(weighted.mean(x, w, na.rm = TRUE))

# REMOVE SPACE PDF FIGURES

savepdf <- function(file, width=16, height=10)
{
  fname <- paste0(file, ".pdf")
  pdf(fname, width=width/2.54, height=height/2.54,
      pointsize=10)
  par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1))
}

# LAGGED VARIABLES

rowShift <- function(x, shiftLen = 1L) {
  r <- (1L + shiftLen):(length(x) + shiftLen)
  r[r<1] <- NA
  return(x[r])
}

# shift <-function(x,shift_by){
#     stopifnot(is.numeric(shift_by))
#     stopifnot(is.numeric(x))

#     if (length(shift_by)>1)
#         return(sapply(shift_by,shift, x=x))

#     out<-NULL
#     abs_shift_by=abs(shift_by)
#     if (shift_by > 0 )
#         out<-c(tail(x,-abs_shift_by),rep(NA,abs_shift_by))
#     else if (shift_by < 0 )
#         out<-c(rep(NA,abs_shift_by), head(x,-abs_shift_by))
#     else
#         out<-x
#     out
# }


# lagvar <- function(dat, var, lag=-1, id) {

# if (sum(is.character(var) & sum(is.character(id)))>0) {

#   for (i in 1:length(var)) {
#   chari <- paste0("lag", abs(lag), "_", var[i], ' := shift(', var[i], ', ', lag, ')')
#   charj <- paste0("by='", id, "'")
#   dat[, eval(parse(text=chari)),  eval(parse(text=charj))]
# }
# }

# else {
#   stop("Variables have to be strings")
# }

# }

# FACTORS

factors <- function(dat, cnames) {
  dat <- dat[ , cnames := lapply(dat[ , cnames, with=FALSE], as.factor), with=FALSE]
}

# LAST VALUE

lastValue <- function(x)   tail(x[!is.na(x)], 1)

# FIRST VALID RECORD IN A VECTOR
firstValue <- function(x) {
if (any(!is.na(x))) {
  x <- na.omit(x)
  head(x, n=1)
}
else { NA }
}

# LAST VALID RECORD IN A VECTOR
lastValue <- function(x) {
if (any(!is.na(x))) {
  x <- na.omit(x)
  tail(x, n=1)
}
else { NA }
}

# NETWORK FUNCTION

GeodesicDistribution <- function (i, data, sims, period, groupName,
                       varName, levls=c(1:5,Inf), cumulative=TRUE, ...) {
 x <- networkExtraction(i, data, sims, period, groupName, varName)
 library(sna)
 a <- sna::geodist(x)$gdist
 if (cumulative)
 {
   gdi <- sapply(levls, function(i){ sum(a<=i) })
 }
else
 {
   gdi <- sapply(levls, function(i){ sum(a==i) })
 }
 names(gdi) <- as.character(levls)
 gdi
}

TriadCensus <- function(i, data, sims, wave, groupName, varName, levls=1:16){
       # unloadNamespace("igraph") # to avoid package clashes
       library(sna)
       library(network)
       x <- networkExtraction(i, data, sims, wave, groupName, varName)
       tc <- sna::triad.census(x)[1,levls]
       # triad names are transferred automatically
       tc
   }
	# MY R FUNCTIONS
	# AUTHOR: SEBASTIAN DAZA
	# Jun 02, 2014, 10:35 AM

	# LOOKING FOR VARIABLES ####
	lookvar <- function(dat, varnames) {
	n <- names(dat)

	nn <- list()
	for (i in 1:length(varnames)) {
	nn[[i]] <- grep(varnames[i],n)
	}

	nn <- unlist(nn)

	if ( length(nn) >0 )
	{
	r <- n[nn]
	return(r)
	}
	else
	{ return("No variables found")}

	}


	# COLORS
	map2color<-function(x,pal,limits=NULL){
	if(is.null(limits)) limits=range(x)
	pal[findInterval(x,seq(limits[1],limits[2],length.out=length(pal)+1), all.inside=TRUE)]
	}

	# LOAD AND INSTALL PACKAGES

	ipak <- function(pkg){
	new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
	if (length(new.pkg))
	install.packages(new.pkg, dependencies = TRUE)
	sapply(pkg, require, character.only = TRUE)
	}

	# transparent colors
	makeTransparent = function(..., alpha=0.5) {

	if(alpha<0 \| alpha>1) stop("alpha must be between 0 and 1")

	alpha = floor(255*alpha)
	newColor = col2rgb(col=unlist(list(...)), alpha=FALSE)

	.makeTransparent = function(col, alpha) {
	rgb(red=col[1], green=col[2], blue=col[3], alpha=alpha, maxColorValue=255)
	}

	newColor = apply(newColor, 2, .makeTransparent, alpha=alpha)

	return(newColor)

	}

	# FORMULA

	formula <- function(var, symbol1, symbol2) {

	# EXAMPLE ####
	# formula(var, "~", "+")

	f <- as.formula(paste0(symbol1, paste0(paste0(var, collapse=symbol2))))
	return(f)
	}

	# REPLACE MISSING DATA
	rmiss <- function(data=DT, r=0) {
	# replace missing:
	for (i in names(DT))
	DT[is.na(get(i)),i:=r,with=FALSE]
	}


	# ASSIGN MISSINGS ####
	assmis <- function(dat, var, codes) {
	# EXAMPLE ####
	# dat <- assmis(dat, list(dvar), list(c(9,99)))

	if (sum(class(dat) %in% c("data.frame", "data.table"))>0) {

	pkgTest <- function(x) {

	is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])

	if (!is.installed(x))
	{
	install.packages(x)
	if(!require(x , character.only = TRUE)) stop("Package not found")
	}
	else {
	require(x, character.only = TRUE)
	}
	}

	pkgTest("data.table")

	if (class(var) %in% "list" & class(codes) %in% "list"
	& (length(var)==length(codes) ))

	{

	dt <- data.table(dat)

	for (i in 1:length(var)) {

	for(j in 1:length(var[[i]])) {
	chari <- paste0(var[[i]][j], ' %in% ' , 'codes[[', i, ']]')
	charj <- paste0(var[[i]][j], ':= NA_integer_')
	dt[eval(parse(text=chari)), eval(parse(text=charj))]

	}
	}

	if (class(dat)[1]=="data.table") {
	return(r <- dt)
	}

	else if (class(dat)[1]=="data.frame") {
	return(r <- data.frame(dt))
	}
	}
	else {
	stop("Variables or codes are not defined as lists, or lists' elements are not the same")
	}
	}
	else{
	stop("The first object is not a data.frame or a data.table")
	}
	}



	# REVERSE SCALE FUNCTION

	revscalev <- function(dat, var, nvar) {

	# EXAMPLE ####
	# p <- revscale(dat, "var", "newvar")


	if (sum(class(dat) %in% c("data.frame", "data.table"))>0) {

	pkgTest <- function(x) {

	is.installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])

	if (!is.installed(x))
	{
	install.packages(x)
	if(!require(x , character.only = TRUE)) stop("Package not found")
	}
	else {
	require(x, character.only = TRUE)
	}
	}

	pkgTest("data.table")

	dt <- data.table(dat)

	if (length(var)==length(nvar)) {

	for(n in 1:length(var)) {
	maxvalue <- max(dt[, var[n], with=FALSE], na.rm=TRUE) + 1
	char <- paste0(nvar[n], ' := ', maxvalue, ' - ', var[n])
	dt[, eval(parse(text=char))]

	}
	if (class(dat)[1]=="data.table") {
	return(r <- dt)
	}

	else if (class(dat)[1]=="data.frame") {
	return(r <- data.frame(dt))
	}
	}
	else {
	stop("Number of old and new variables is not same")
	}
	}
	else{
	stop("The first object is not a data.frame or a data.table")
	}
	}


	# Z-Scores

	zs <- function(x) (x - Mean (x) ) / Sd(x)

	# zs <- function(dat, var) {

	# if (sum(is.character(var))>0) {
	# for (i in 1:length(var)) {
	# char <- paste0("z", var[i], ' := (', var[i], "- Mean(", var[i], "))/Sd(", var[i],")")
	# dat[, eval(parse(text=char))]
	# }
	# }
	# else {
	# stop("Variables have to be strings")
	# }
	# }

	# AS NUMERIC

	asnumerics <- function(dat, cnames) {
	dat <- dat[ , cnames := lapply(dat[ , cnames, with=FALSE], as.numeric), with=FALSE]
	}

	# GET VARIABLES FROM SHEET ####

	getVar <- function(addC = TRUE) {

	characterVector <- scan(pipe("pbpaste"), what = "character")

	formattedString <- paste(characterVector , collapse="\", \"")
	formattedString <- paste("\"", formattedString, "\"", sep="")
	if (addC) formattedString <- paste("c(", formattedString , ")", sep = "")
	writeLines(formattedString, con = pipe("pbcopy"), sep = " ")
	}

	getNum <- function(addC = TRUE) {

	nums <- scan(pipe("pbpaste"), what = double())
	nums <- paste(as.character(nums), collapse=", ")
	if (addC) nums <- paste("c(", nums , ")", sep = "")
	writeLines(nums, con = pipe("pbcopy"), sep = " ")
	}

	# PASTE DATA ####

	paste.data <- function(header=TRUE) {read.table(pipe("pbpaste"), header=header)}

	# FUNCTIONS FOR TABLES

	Mean <- function(x) as.numeric(mean(x, na.rm=TRUE))
	Sd <- function(x) as.numeric(sd(x, na.rm=TRUE))
	# N <- function(x) sum(!is.na(x))

	Sum <- function(x) sum(x, na.rm=TRUE)

	Sums <- function(x) {

	if (sum(is.na(x))==length(x)) {
	y <- NA
	}

	else {y <- sum(x, na.rm=TRUE)}

	return(as.numeric(y))
	}


	lrecode <- function(dat, var, newvar, old, new) {

	# NEVER DEFINE A FIRST VALUE AS MISSING
	# PUT MISSING VALUES AT THE END OF OLD/NEW SPECIFICATION

	if (length(var)==length(newvar) & length(old)==length(new)) {

	for (i in 1:length(var)) {

	for (h in 1:length(old)) {

	a <- paste0(var[i], "==", old[h])
	b <- paste0(newvar[i], ":= ", new[h])
	dat[eval(parse(text=a)), eval(parse(text=b))]

	}
	}
	return(dat)
	}

	else {
	stop("var or value list doesn't have the same size")

	}
	}





	Max <- function(x) {
	if (class(x)=="integer") {
	ifelse(all(is.na(x)), as.integer(NA) , max(x, na.rm = TRUE))
	}
	else {
	ifelse(all(is.na(x)), as.numeric(NA) , max(x, na.rm = TRUE))
	}
	}

	Min <- function(x) {
	if (class(x)=="integer") {
	ifelse(all(is.na(x)), as.integer(NA) , min(x, na.rm = TRUE))
	}
	else {
	ifelse(all(is.na(x)), as.numeric(NA) , min(x, na.rm = TRUE))
	}
	}

	wtmean <- function(x, w) as.numeric(weighted.mean(x, w, na.rm = TRUE))

	# REMOVE SPACE PDF FIGURES

	savepdf <- function(file, width=16, height=10)
	{
	fname <- paste0(file, ".pdf")
	pdf(fname, width=width/2.54, height=height/2.54,
	pointsize=10)
	par(mgp=c(2.2,0.45,0), tcl=-0.4, mar=c(3.3,3.6,1.1,1.1))
	}

	# LAGGED VARIABLES

	rowShift <- function(x, shiftLen = 1L) {
	r <- (1L + shiftLen):(length(x) + shiftLen)
	r[r<1] <- NA
	return(x[r])
	}

	# shift <-function(x,shift_by){
	# stopifnot(is.numeric(shift_by))
	# stopifnot(is.numeric(x))

	# if (length(shift_by)>1)
	# return(sapply(shift_by,shift, x=x))

	# out<-NULL
	# abs_shift_by=abs(shift_by)
	# if (shift_by > 0 )
	# out<-c(tail(x,-abs_shift_by),rep(NA,abs_shift_by))
	# else if (shift_by < 0 )
	# out<-c(rep(NA,abs_shift_by), head(x,-abs_shift_by))
	# else
	# out<-x
	# out
	# }


	# lagvar <- function(dat, var, lag=-1, id) {

	# if (sum(is.character(var) & sum(is.character(id)))>0) {

	# for (i in 1:length(var)) {
	# chari <- paste0("lag", abs(lag), "_", var[i], ' := shift(', var[i], ', ', lag, ')')
	# charj <- paste0("by='", id, "'")
	# dat[, eval(parse(text=chari)), eval(parse(text=charj))]
	# }
	# }

	# else {
	# stop("Variables have to be strings")
	# }

	# }

	# FACTORS

	factors <- function(dat, cnames) {
	dat <- dat[ , cnames := lapply(dat[ , cnames, with=FALSE], as.factor), with=FALSE]
	}

	# LAST VALUE

	lastValue <- function(x) tail(x[!is.na(x)], 1)

	# FIRST VALID RECORD IN A VECTOR
	firstValue <- function(x) {
	if (any(!is.na(x))) {
	x <- na.omit(x)
	head(x, n=1)
	}
	else { NA }
	}

	# LAST VALID RECORD IN A VECTOR
	lastValue <- function(x) {
	if (any(!is.na(x))) {
	x <- na.omit(x)
	tail(x, n=1)
	}
	else { NA }
	}

	# NETWORK FUNCTION

	GeodesicDistribution <- function (i, data, sims, period, groupName,
	varName, levls=c(1:5,Inf), cumulative=TRUE, ...) {
	x <- networkExtraction(i, data, sims, period, groupName, varName)
	library(sna)
	a <- sna::geodist(x)$gdist
	if (cumulative)
	{
	gdi <- sapply(levls, function(i){ sum(a<=i) })
	}
	else
	{
	gdi <- sapply(levls, function(i){ sum(a==i) })
	}
	names(gdi) <- as.character(levls)
	gdi
	}

	TriadCensus <- function(i, data, sims, wave, groupName, varName, levls=1:16){
	# unloadNamespace("igraph") # to avoid package clashes
	library(sna)
	library(network)
	x <- networkExtraction(i, data, sims, wave, groupName, varName)
	tc <- sna::triad.census(x)[1,levls]
	# triad names are transferred automatically
	tc
	}