Alexander Shenkin ashenkin

## coauthor_list_generator.r
# Alexander Shenkin 2023.
# License: CC BY 4.0: https://creativecommons.org/licenses/by/4.0/.  TLDR; Share and adapt with attribution.
#
# This script reads in a csv file of publications, and produces a list of coauthors
# 1) To get the csv of publications, download a bibtex or other archive of desired pubs from ORCID or another publication search tool.
# Note: don't use Google Scholar!  They limit the number of co-authors exported.
# 2) Import that bibtex or other format archive into Zotero
# 3) Export those imported references in a .csv format.
#
# This does not include institutions of the coauthors, unfortunately.  That still has to be done manually.

## unscale.r
unscale <- function(scaled, scale, center) {
  # provide either scale & center, or a scaled vector with the proper attributes
  if (missing(scale) | missing(center)) {
    stopifnot( c("scaled:center", "scaled:scale") %in% names(attributes(scaled)) )
    scale = attr(scaled, "scaled:scale")
    center = attr(scaled, "scaled:center")
    attr(scaled, "scaled:scale") <- NULL
    attr(scaled, "scaled:center") <- NULL
  }
  unscaled = scaled * scale + center

## set_dev_contrasts.r
contr.sum.keepnames <- function(...) {
    # make deviation contrasts that don't lose the names of the factors in the model results
    # from https://stackoverflow.com/questions/10808853/why-does-changing-contrast-type-change-row-labels-in-r-lm-summary
    conS <- contr.sum(...)
    colnames(conS) = rownames(conS)[-length(rownames(conS))]
    conS
}

set_dev_contrasts <- function(df, colname = "site") {
    # Set contrasts to "deviation coding" so site effects are as compared to overall mean across all sites.  I.e., sites together should have a mean 0 effect.

## predict_scaled_glmer.r
# We often fit LMM/GLMM's with scaled variables.  However, making predictions using those models isn't straightforward (at least to me!)
# It turns out that you have to re-scale your prediction data using the same parameters used to scale your original data frame used to fit the model
# See below, and pay special attention to the section where the new data are rescaled.

library(lme4)
library(VGAM)

reps = 3000

dbh = rexp(reps); dbh = dbh/max(dbh) * 100

## make_elevation_profile_swaths.r
# Thanks to Forrest Stevens.  Some of the code here borrowed from him here: https://github.com/ForrestStevens/Scratch/blob/master/swath_slices.R

library(raster)
library(rgdal)
library(sp)
library(rgeos)
library(gtools)
library(ggplot2)
library(plyr)
library(zoo)

## query_higher_taxa_classes.r
query_higher_taxa_classes <- function(species_list, known = "genus", order = c("dataframe", "unique_sp")) {
    # Pass in a character vector of species, genera, families, or whatever (the search is flexible)
    # Returns a dataframe with the columns: query, db, family, order, subdivision
    # The dataframe returned is guaranteed to be in the same order as the species list passed in if order is "dataframe"
    order = match.arg(order)

    library(taxize)
    library(plyr)
    species_list = sub("^([^ ]*).*$","\\1",species_list) # just take the top level name before the space
    # remove short names that clog the taxon query - replace later

## na.omit.somecols.r
na.omit.somecols <- function(data, noNAsInTheseCols, allOutputCols = names(data)) {
    # usage: na.omit.somecols(my_dataframe, c("col1", "col2")).  You can also supply a vector of names (allOutputCols) if you just want certain columns returned.
    completeVec <- complete.cases(data[, noNAsInTheseCols])
    return(data[completeVec, allOutputCols])
}
	# Alexander Shenkin 2023.
	# License: CC BY 4.0: https://creativecommons.org/licenses/by/4.0/. TLDR; Share and adapt with attribution.
	#
	# This script reads in a csv file of publications, and produces a list of coauthors
	# 1) To get the csv of publications, download a bibtex or other archive of desired pubs from ORCID or another publication search tool.
	# Note: don't use Google Scholar! They limit the number of co-authors exported.
	# 2) Import that bibtex or other format archive into Zotero
	# 3) Export those imported references in a .csv format.
	#
	# This does not include institutions of the coauthors, unfortunately. That still has to be done manually.
	unscale <- function(scaled, scale, center) {
	# provide either scale & center, or a scaled vector with the proper attributes
	if (missing(scale) \| missing(center)) {
	stopifnot( c("scaled:center", "scaled:scale") %in% names(attributes(scaled)) )
	scale = attr(scaled, "scaled:scale")
	center = attr(scaled, "scaled:center")
	attr(scaled, "scaled:scale") <- NULL
	attr(scaled, "scaled:center") <- NULL
	}
	unscaled = scaled * scale + center
	contr.sum.keepnames <- function(...) {
	# make deviation contrasts that don't lose the names of the factors in the model results
	# from https://stackoverflow.com/questions/10808853/why-does-changing-contrast-type-change-row-labels-in-r-lm-summary
	conS <- contr.sum(...)
	colnames(conS) = rownames(conS)[-length(rownames(conS))]
	conS
	}

	set_dev_contrasts <- function(df, colname = "site") {
	# Set contrasts to "deviation coding" so site effects are as compared to overall mean across all sites. I.e., sites together should have a mean 0 effect.
	# We often fit LMM/GLMM's with scaled variables. However, making predictions using those models isn't straightforward (at least to me!)
	# It turns out that you have to re-scale your prediction data using the same parameters used to scale your original data frame used to fit the model
	# See below, and pay special attention to the section where the new data are rescaled.

	library(lme4)
	library(VGAM)

	reps = 3000

	dbh = rexp(reps); dbh = dbh/max(dbh) * 100
	# Thanks to Forrest Stevens. Some of the code here borrowed from him here: https://github.com/ForrestStevens/Scratch/blob/master/swath_slices.R

	library(raster)
	library(rgdal)
	library(sp)
	library(rgeos)
	library(gtools)
	library(ggplot2)
	library(plyr)
	library(zoo)
	query_higher_taxa_classes <- function(species_list, known = "genus", order = c("dataframe", "unique_sp")) {
	# Pass in a character vector of species, genera, families, or whatever (the search is flexible)
	# Returns a dataframe with the columns: query, db, family, order, subdivision
	# The dataframe returned is guaranteed to be in the same order as the species list passed in if order is "dataframe"
	order = match.arg(order)

	library(taxize)
	library(plyr)
	species_list = sub("^([^ ]).$","\\1",species_list) # just take the top level name before the space
	# remove short names that clog the taxon query - replace later
	na.omit.somecols <- function(data, noNAsInTheseCols, allOutputCols = names(data)) {
	# usage: na.omit.somecols(my_dataframe, c("col1", "col2")). You can also supply a vector of names (allOutputCols) if you just want certain columns returned.
	completeVec <- complete.cases(data[, noNAsInTheseCols])
	return(data[completeVec, allOutputCols])
	}