Last active August 6, 2019 12:25
## Curated stuff:
## * MetaCran:
## * List of Packages gathered by Garret G.:
## * List of popular packages
## * List of DataScience R tutorials
## * List of machine elearning tutorials by subject:
## Reproducible package management for R:
# More info @
## great RStudio Addins
install.packages("radiant", repos = "", type = 'binary')
install.packages("addinslist") # an AddIn that serves as AddIn Browser and Manager
install.packages("ggThemeAssist") # ggplot2 theme assistance
devtools::install_github("jennybc/jadd") # Convenience function for developing and debuging functions
devtools::install_github("tjmahr/WrapRmd") # a tool to convienently wrap long RMD texts with R code
devtools::install_github("MangoTheCat/tidyshiny") # Interactively manipulate data with the tidyr package using this handy shiny gadget.
## great RStudio Markdown Templates
install.packages("rmdformats") # great templates for RMD files
## great bioconductor packages
#EBIImage - Image Processing: source(""), biocLite("EBImage")
## cran packages recommended by a blogger
x <- c(
## Data Import and Manipulation
"broom", # Convert statistical analysis objects from R into tidy format
'XML', # tools for parsing and generating XML
'foreign', # functions for reading and writing data stored by Minitab, S, SAS, SPSS...
'lubridate', # makes it easier to work with dates and times by providing functions to identify and parse date-time data
'stringr', # makes it easier to work with strings
'sqldf', # for running SQL statements on R data frames, optimized for convenience
'RCurl', # general network (HTTP/HTTPS/FTP/...) client interface for R
'rjson', # converts R object into JSON objects and vice-versa
'xlsx', # provides R functions to read/write/format Excel 2007 and Excel 97/2000/XP/2003 file formats
'tidyr', # an evolution of reshape2. It's designed specifically for data tidying (not general reshaping or aggregating) and works well with dplyr data pipelines
'dplyr', # a fast, consistent tool for working with data frame like objects, both in memory and out of memory
'httr', # provides useful tools for working with HTTP
', # Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all
## Exploratory Data Analysis and Visualization
'ggplot2', # an implementation of the Grammar of Graphics
'RColorBrewer', # provides palettes for drawing nice maps shaded according to a variable
'ellipse', # functions for drawing ellipses and ellipse-like confidence regions
'animation', # a gallery of animations in statistics and utilities to create animations
'shiny', # elegant and powerful web framework for building interactive web applications using R
## my own installed packages on my system
y = c(
#### Graphics devices
"svglite", # fast SVG graphics device - Hadlyverse
"car", # mostly used: car::recode
## testing, asserting and validating tools
"validate", # awesome package for rule based checking of data, very sophistcated and mature
"assertr", # assert tool especially optimized for data checking, plays well with dplyr
"ensurer", # Ensure Values at Runtime, plays well with dplyr
"testthat", # r unit test framework "hadleyverse", could be also great for data checking
"asserthat", # r unit test framework "hadleyverse", could be also great for data checking
## knitr suite
"knitr", # literate programming for R
#"printr" # yet to be released to CRAN; companion package to knitr by its creator
"pander", # An R Pandoc Writer
"sparkTable", # create sparklines, sparkhistograms, sparktables for shiny and knitr docs
"formattable", # Awesome package to conditionally format tables by renkun-ken
## Templates for RStudio/knitr RMDMarkdown format
"rmdformats", # html_clean and html_docco template
## Table generation packages
"huxtable", ### my new favorite - htmlTable++
"tangram", ### grammar of tables - from hmisc creator
"desctable", ### my new favorite - DescTools/tableone for the tidyverse
"tableone", ### great for Table 1, works well for Propensity Score analytics
"moonBook", # great for Table1 creation for medical papers
"ztable", # great for table creation for medical papers
"compareGroups", ### great for Table 1
"htmlTable", ### great for creating tables like in top medical journals, does not summarize only helps with formating
"DescTools", ### prefect for describtive stats, great tools for plotting and descriptive stats
"stargazer", # great table output for model objects
"pixiedust", # a grammar for formatting tables.
"tables", # has an interesting interface for producing summary statistics. It looks complex but powerful.
## Image processing
"imager", # imager: an R package for image processing
## Shiny and Shiny addons -- check out
"shiny", # essential - R web framework
"flexdashboard" # Easy interactive dashboards for R - works well with htmlwidgets
"shinyAce", # code editor
"shinyjs", # javascript in shiny made easy
"shinythemes", # a collection of Shiny themes for the theme argument of bootstrapPage, fluidPage, navbarPage, or fixedPage
"shinydashboard", # a dashboard framework, much more options than flexdashboard, but also more features
## htmlwidgtes framework for JS/D3 widgets in Shiny and knitr
"htmlwidgets" , #htmlwidgtes
"trelliscopejs" # great for exploratory analysis
"networkD3", # great for static and interactive network graphics
"visNetwork", # great for static and interactive network graphics
"rgl", # 3D plotting WebGL widget
# # timelines in R
"mindr" # mindmaps in markdown!
"corrr", # tidy woking with correlations
## data structures
"data.tree", # General Purpose Hierarchical Data Structure
## Graph Database utilities
"tidygraph" # verbs and a tidy API for working with graphs
"igraph", # igraph is a collection of network analysis and viz tools
"RNeo4j", # Neo4j R driver
# OrientDB - ???
"networkD3", # htmlwidget: great for static and interactive network graphics
"visNetwork", # htmlwidget: great for static and interactive network graphics
"ggraph", "ggnetwork", # ggplot2 extensions for graphs
## clustering
"cluster", ## main functions: agnes and daisy
"fpc", ## check validity of clusters: clusterboot function; kmeansruns
"heatmaply", ## sophisticated heatmap ploting
## data reading and munging packages - tidyverse
"janitor", # great for cleaning data
"dplyr", # DSL for data work - hadleyverse
"wrapr", # great for NSE with dplyr, simplifies NSE programming quite a lot!
"tidyr", # great for data munging - hadleyverse
"purrr", # functional programming for lists - hadleyverse
"broom", # Convert statistical analysis objects from R into tidy format
"forcats", # working with factors
"lubridate", # working with dates
"magrittr", # %>%
"jsonlite", # jsonlite: A Robust, High Performance JSON Parser and Generator for R
"tidyjson", # a grammar for reading in deterministically json data into a data frame
"yaml", # Methods to convert R data to YAML and back; works well with data.tree package
"readr", # great for data IO of text files - hadleyverse
"readxl", # for data IO of excel files, still buggy and unreliable - hadleyverse
"janitor", # must have when reading in excel files with the readxl package
"openxlsx", # great for data IO of excel files
"XLConnect", # for reading in Excel - at the moment the most reliable. Backbone: Java
"rvest", # convenient webscraping - hadleyverse
"haven", # great for data IO of SPSS, SAS, STATA im/export - hadleyverse
"import", # from the creator of magrittr: great for importing
"dummies", # for easy creation of dummy variables
"feather", # high speed data reading andd writing format
'doMC' # for multi-core processing
# cat data analysis
"vcd", # great for categorical data analysis
"epibasix", # epi helper package
"epicalc", # epi helper package (print_logistic), not anymore maintained
"epiR", # epi helper package
"epitools", # epi helper package
# excat statistics
"exact2x2", # exact methods
"exactci", # exact methods
## ggplot2 and general plotting packages
# A web page listing all ggplot2 extensions:
"ggplot2", # see packages "cowplot" (plot_grid)
"ggrepel", # extension: for direct labeling
"ggforce", # extension: contextual zoom -> facet_zoom ; sinaplot -> geom_sina
"ggbeeswarm", # extension: awesome beeswarm plots
"gganimate", "tweenr", # extension: easy animations of ggplot2 plots
"ggloop", # extension: Create 'ggplot2' Plots in a Loop
"ggthemes","ggthemr", # great themes for ggplot2
"ggsci", # plotting scales and palettes of scientific journals
"ggTimeSeries", # great for time series or time related data! Innovative viz!
"ggpubr" # easy 'ggplot2' Based Publication Ready Plots
"viridis", # academic and aestethic proven color palette
# great ploting packages for special purposes
"tabplot", # Tableplot, a Visualization of Large Datasets
"plotluck", # fantastic and crazy simple way of beautiful automatic plotting for exploratory purposes with formulat interface
"survminer", # ggplot2 based beautiful and 'ready-to-publish' survival curves with tables
"rms", # survplot and estimates
"survMisc", # also survival plotting
"trelliscopejs" # great for exploratory analysis
"GGally", # GGally - some special plots using ggplot2
"likert", # great for visualizing likert scales
"dotwhisker", # Better than coefplot, Visualizing regression coefficients - very nice!, plays well with dplyr and broom
"visreg", # great for visualizing a regression result
"corrplot", "corrgram", # visualizing correlation matrices
"forestplot", # Advanced Forest Plot Using 'grid' Graphics
"venneuler", # creating Venn and Euler diagramms
"dendextend", # create package for various viz of dendograms
"plotROC", # create for plotting ROC curves
"DiagrammeR", # excellent Diagram lib
"ggfortify", # Define fortify and autoplot functions to allow ggplot2 to handle some popular R packages.,
"cowplot", # great for multi-panel figures with ggplot2 - plot_grid; save_ggplot3
"ggdendro", "ggRandomForests",
"ggmcmc", # has gg_pairs function
"plotly", # good plots
"ggvis", # next gen ggplot2, early dev
"plotly", # plotly API - offline dynamic viz gen.
"timeline", # timeline: Timelines for a Grammar of Graphics
"sjPlot", # package for plotting of forest plots and interaction term of GLMs
"moonBook", # for plotting forests plots and survival curves
"scatterplot3d", # great 3d plotting library
"plot3D", #
"rgl", # 3D plotting including the WebGL htmlwidget
"simmer", # Discrete event simulation with nice viz; plays well with dplyr
# Meta research - Meta Analysis - Open Science
"MAVIS", # a shiny app for Meta-Analysis
"metagear", # Research Synthesis Tools for Systematic Reviews and Meta-Analysis
"metafor", # comprehensive collection of functions for conducting meta-analyses in R.
"meta", # meta analysis package
"", # Compute Effect Sizes
"MAVIS", # MAVIS: Meta Analysis via Shiny
"minerva", # implementation of MIC
# Survival Analysis
"survMisc", # great survival misc functions
"installr", # great tool for installing R and other necessary tools
"lme4", # linear mixed effects
"multcomp", # handling multiple comparison
"lubridate", # easy working with dates - hadleyverse
# QUality Control
# Propensity Score tools
# shiny::runGitHub("LaurenSamuels/VisualPruner")
"optmatch", "twang",
# Single/Multiple Imputation
"VIM", # vizualising missing data patterns and imputing missing data with fast algorithms
"mice","Amelia","mi" # multiple imputation tools, Amelia for longitudinal data
"missMDA", # great for single imputation, however, unstable
"mosaic", # some handy extension to stat functions, e.g. formula interface, etc.
"packrat", # facilitates Reproduceable Research - Package Versioning by RStudio
"checkpoint", # facilitates Reproduceable Research - Package Versioning by Revolution Analytics
## Building APIs with R
"plumber", # easy creation of R webservice by just code decoration
"fiery", "routr", # a web-server and web-socket server and routr for routing of HTTP and WebSocket in R, great for web service dev
"jug", # small web development framework for R, make building APIs for your code as easy as possible
## Machine Learning and Statistical Modeling
"h2o", # create MachienLearning libarary, AutoML
"rpart", # decesion trees
"party", #
"xgboost", # high speed gbm implementation
'gbm', # Generalized Boosted Regression Models
'glmnet', # lasso and elastic-net regularized generalized linear models
'geepack', # GEE: alternative to Linear Mixed Models for correlated data i.e. time series, clustered cohorts, ...
'contrast', 'multcomp', # great for contrasts in linear models: contrast:contrast function and
# multcomp:glht function. Like the lincom command in Stata
'tree', # classification and regression trees
'randomForest', # classification and regression based on a forest of trees using random inputs
'mclust', # Normal Mixture Modeling for Model-Based Clustering, Classification, and Density Estimation
'car', # Companion to Applied Regression. Esp. useful for ANOVA tables.
'lme4', # linear mixed-effects models using S4 classes
'mvtnorm', # multivariate Normal and t Distributions
# specially useful:
# * car::recode() --> for easy recoding of any variables
# * car::linear.hypothesis() --> like 'lincom' command in Stata, alterantive is survey::svycontrast()
'rms', # regression modelling tools
# specially useful:
# * rms::datadist() --> like 'adjust' command in Stata
## Text mining, string distances, NLP
'tm', # a framework for text mining applications within R
"stringdist", # Approximate String Matching and String Distance Functions
"pwr", # Power Analysis
# rOpenScience and rOpenHealth Packages
"rentrez", # talk with NCBI entrez using R
"RISmed", # talk with NCBI entrez using R
"rclinicaltrials", # an interface to - fetches studies meta-data and study data
"rcrossref", # crossref API
"fulltext", # download fulltext articles using R,
"BerlinData", # BerlinData: Easy access to Berlin related data
"rHealthDataGov", # This package provides an R interface to the data API
"rsnps", # Search and retrieve Single Nucleotide Polymorphism data from openSNP
"gistr", # a light interface to GitHub's gists for R.
"gtrendsR", # R Functions to Perform and Display Google Trends Queries
# Interfaces to the cloud / loud services
"googlesheets", # Google Spreadsheets R API
"rdrop2", # Programmatic Interface to the 'Dropbox' API
"Rmonkey", # A Survey Monkey R Client
# robust
"robustbase", # lm and glm without relying on distributional assumptions
"roxygen2", # intra source documentation -hadleyverse
# databases -----------
"RPostgreSQL", # interface to postgresql
"MonetDBLite", # in-process database engine including dplyr backend - ultra-fast column-based storage
"RSQLite", # in-process database engine including dplyr backend - rock-solid, row-based storage
"stringr", # hadley whickhams string processing package
"swirl" # interactive R tutorial
install.packages(union(x, y))
## github package
z <- c("google/CausalImpact", # Estimating causal effects in time series,,
"haozhu233/ezsummary", # Summary stats tables making use od dplyr
"trinker/wakefield" # wakefield is designed to quickly generate random data sets.
