View SOM example.R
#SOM example using wines data set | |
library(kohonen) | |
data(wines) | |
set.seed(7) | |
#create SOM grid | |
sommap <- som(scale(wines), grid = somgrid(2, 2, "hexagonal")) | |
## use hierarchical clustering to cluster the codebook vectors | |
groups<-3 |
View example.R
#initialize | |
library(shiny) | |
library(ggplot2) | |
library(purrr) | |
library(dplyr) | |
#example data | |
data(iris) |
View plotly_select_DT.R
#plotly box or lasso select linked to | |
# DT data table | |
# using Wage data | |
# the out group: is sex:Male, region:Middle Atlantic + | |
library(ggplot2) | |
library(plotly) | |
library(dplyr) | |
library(ISLR) |
View gist:d15aedea295f32fa43d76b0a864c577b
DATA SCIENCE EXERCISE | |
The following challenge requires the beer reviews data set called beer_reviews.csv. This data set can be downloaded from the following site: https://data.world/socialmediadata/beeradvocate . Note you can create a free temporary account to download this .csv. | |
Questions to answer using this data: | |
Which brewery produces the strongest beers by ABV%? | |
If you had to pick 3 beers to recommend using only this data, which would you pick? | |
Which of the factors (aroma, taste, appearance, palette) are most important in determining the overall quality of a beer? | |
Additional math/coding question unrelated to the data: |
View Orthogonal Signal Correction (OSC) for PLS models OSC-PLS (OPLS)
#see updated code base and some examples in the function "test" | |
# https://github.com/dgrapov/devium/blob/master/R/Devium%20PLS%20%20and%20OPLS.r | |
#Orthogonal Signal Correction for PLS models (OPLS) | |
#adapted from an example in the book <a href="http://www.springer.com/life+sciences/systems+biology+an+bioinfomatics/book/978-3-642-17840-5">"Chemometrics with R by Ron Wehrens"</a> | |
#this code requires the following packages: | |
need.packages<-c("pls", # to generate PLS models | |
"ggplot2" ) # to plot results |
View global.R
#check for and/or install dependencies | |
need<-c("RCurl","ggplot2","gridExtra","reshape2") | |
for(i in 1:length(need)){ | |
if(require(need[i], character.only = TRUE)==FALSE){ install.packages(need[i]);library(need[i], character.only = TRUE)} else { library(need[i],character.only = TRUE)} | |
} | |
if(require(pcaMethods)==FALSE){ | |
need<-c('Rcpp', 'rJava', | |
'Matrix', 'cluster', 'foreign', 'lattice', 'mgcv', 'survival') | |
for(i in 1:length(need)){ |
View replace_in.R
> in | |
Error: unexpected 'in' in "in" |
View pca.R
# Basic PCA example | |
# use www.createdatasol.com for | |
# an advanced user interface | |
#required packages for plotting | |
library(ggplot2) | |
library(ggrepel) | |
#load data | |
data<-read.csv('~/Sampledata.csv', |
View Andrews plots and encoding.r
#needed packages | |
library(andrews) | |
library(ggplot2) | |
#get some data here I use "mtcars" | |
data<-mtcars | |
fct<-as.factor(mtcars$cyl) # factor for intial grouping | |
#get andrews encoding (x and y coords) |
View tanimoto.R
#' @title fast_tanimoto | |
#' @param mat matrix or data frame of numeric values | |
#' @param output 'matrix' (default) or 'edge list' (non-redundant and undirected) | |
#' @param progress TRUE, show progress | |
#' @imports reshape2 | |
fast_tanimoto<-function(mat,output='matrix',progress=TRUE){ | |
mat[is.na(mat)]<-0 | |
#scoring function | |
score<-function(x){sum(x==2)/sum(x>0)} |
NewerOlder