Dmitry Grapov dgrapov

## covariate_adjust.R
#get linear model residuals
#' @import dplyr
#' @export
dave_lm_adjust<-function(data,formula,test_vars,adjust=TRUE,progress=TRUE){

  if (progress == TRUE){ pb <- txtProgressBar(min = 0, max = ncol(data), style = 3)} else {pb<-NULL}

  out <- lapply(1:length(test_vars), function(i) {
    if (progress == TRUE) {
      setTxtProgressBar(pb, i)

## Orthogonal Signal Correction (OSC) for PLS models OSC-PLS (OPLS)
#see updated code base and some examples in the function "test"
# https://github.com/dgrapov/devium/blob/master/R/Devium%20PLS%20%20and%20OPLS.r

#Orthogonal Signal Correction for PLS models (OPLS)
#adapted from an example in the book <a href="http://www.springer.com/life+sciences/systems+biology+an+bioinfomatics/book/978-3-642-17840-5">"Chemometrics with R by Ron Wehrens"</a>

#this code requires the following packages:
need.packages<-c("pls", # to generate PLS models
"ggplot2" ) # to plot results

## example.R
#initialize
library(shiny)
library(ggplot2)
library(purrr)
library(dplyr)


#example data
data(iris)

## SOM example.R
#SOM example using wines data set
library(kohonen)
data(wines)
set.seed(7)

#create SOM grid
sommap <- som(scale(wines), grid = somgrid(2, 2, "hexagonal"))

## use hierarchical clustering to cluster the codebook vectors
groups<-3

## example.R
library(reshape2)

gen.mat.to.edge.list<-function(mat,symmetric=TRUE,diagonal=FALSE,text=FALSE){
	#create edge list from matrix
	# if symmetric duplicates are removed
	mat<-as.matrix(mat)
	id<-is.na(mat) # used to allow missing
	mat[id]<-"nna"
	if(symmetric){mat[lower.tri(mat)]<-"na"} # use to allow missing values
	if(!diagonal){diag(mat)<-"na"}

## global.R
#initialize
library(datasets)
library(ggplot2)

#helper function (convert vector to named list)
namel<-function (vec){
		tmp<-as.list(vec)
		names(tmp)<-as.character(unlist(vec))
		tmp
	}

## global.R
#check for and/or install dependencies
need<-c("RCurl","ggplot2","gridExtra","reshape2")
for(i in 1:length(need)){
  if(require(need[i], character.only = TRUE)==FALSE){ install.packages(need[i]);library(need[i], character.only = TRUE)} else { library(need[i],character.only = TRUE)}
}

if(require(pcaMethods)==FALSE){
  need<-c('Rcpp', 'rJava',
  'Matrix', 'cluster', 'foreign', 'lattice', 'mgcv', 'survival')
for(i in 1:length(need)){

## plotly_select_DT.R
#plotly box or lasso select linked to
# DT data table
# using Wage data
# the out group: is sex:Male, region:Middle Atlantic +


library(ggplot2)
library(plotly)
library(dplyr)
library(ISLR)

## dplyr_tutorial.Rmd
---
title: "Hands-on with dplyr"
author: "Dmitry Grapov"
output:
  html_document:
    keep_md: yes
---

## Introduction

## gist:d15aedea295f32fa43d76b0a864c577b
DATA SCIENCE EXERCISE

The following challenge requires the beer reviews data set called beer_reviews.csv. This data set can be downloaded from the following site: https://data.world/socialmediadata/beeradvocate . Note you can create a free temporary account to download this .csv.

Questions to answer using this data:
Which brewery produces the strongest beers by ABV%?
If you had to pick 3 beers to recommend using only this data, which would you pick?
Which of the factors (aroma, taste, appearance, palette) are most important in determining the overall quality of a beer?

Additional math/coding question unrelated to the data:
	#get linear model residuals
	#' @import dplyr
	#' @export
	dave_lm_adjust<-function(data,formula,test_vars,adjust=TRUE,progress=TRUE){

	if (progress == TRUE){ pb <- txtProgressBar(min = 0, max = ncol(data), style = 3)} else {pb<-NULL}

	out <- lapply(1:length(test_vars), function(i) {
	if (progress == TRUE) {
	setTxtProgressBar(pb, i)
	#see updated code base and some examples in the function "test"
	# https://github.com/dgrapov/devium/blob/master/R/Devium%20PLS%20%20and%20OPLS.r

	#Orthogonal Signal Correction for PLS models (OPLS)
	#adapted from an example in the book <a href="http://www.springer.com/life+sciences/systems+biology+an+bioinfomatics/book/978-3-642-17840-5">"Chemometrics with R by Ron Wehrens"</a>

	#this code requires the following packages:
	need.packages<-c("pls", # to generate PLS models
	"ggplot2" ) # to plot results
	#initialize
	library(shiny)
	library(ggplot2)
	library(purrr)
	library(dplyr)


	#example data
	data(iris)
	#SOM example using wines data set
	library(kohonen)
	data(wines)
	set.seed(7)

	#create SOM grid
	sommap <- som(scale(wines), grid = somgrid(2, 2, "hexagonal"))

	## use hierarchical clustering to cluster the codebook vectors
	groups<-3
	library(reshape2)

	gen.mat.to.edge.list<-function(mat,symmetric=TRUE,diagonal=FALSE,text=FALSE){
	#create edge list from matrix
	# if symmetric duplicates are removed
	mat<-as.matrix(mat)
	id<-is.na(mat) # used to allow missing
	mat[id]<-"nna"
	if(symmetric){mat[lower.tri(mat)]<-"na"} # use to allow missing values
	if(!diagonal){diag(mat)<-"na"}
	#initialize
	library(datasets)
	library(ggplot2)

	#helper function (convert vector to named list)
	namel<-function (vec){
	tmp<-as.list(vec)
	names(tmp)<-as.character(unlist(vec))
	tmp
	}
	#check for and/or install dependencies
	need<-c("RCurl","ggplot2","gridExtra","reshape2")
	for(i in 1:length(need)){
	if(require(need[i], character.only = TRUE)==FALSE){ install.packages(need[i]);library(need[i], character.only = TRUE)} else { library(need[i],character.only = TRUE)}
	}

	if(require(pcaMethods)==FALSE){
	need<-c('Rcpp', 'rJava',
	'Matrix', 'cluster', 'foreign', 'lattice', 'mgcv', 'survival')
	for(i in 1:length(need)){
	#plotly box or lasso select linked to
	# DT data table
	# using Wage data
	# the out group: is sex:Male, region:Middle Atlantic +


	library(ggplot2)
	library(plotly)
	library(dplyr)
	library(ISLR)
	---
	title: "Hands-on with dplyr"
	author: "Dmitry Grapov"
	output:
	html_document:
	keep_md: yes
	---

	## Introduction
	DATA SCIENCE EXERCISE

	The following challenge requires the beer reviews data set called beer_reviews.csv. This data set can be downloaded from the following site: https://data.world/socialmediadata/beeradvocate . Note you can create a free temporary account to download this .csv.

	Questions to answer using this data:
	Which brewery produces the strongest beers by ABV%?
	If you had to pick 3 beers to recommend using only this data, which would you pick?
	Which of the factors (aroma, taste, appearance, palette) are most important in determining the overall quality of a beer?

	Additional math/coding question unrelated to the data: