Last active
March 1, 2018 13:25
-
-
Save huberflores/3b209da6e81a0cfe774d to your computer and use it in GitHub Desktop.
Common Commands to use in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###DO NOT delete header### | |
# | |
# author Huber Flores | |
# | |
#clear screen | |
> command | |
Ctrl + l | |
> | |
#close console | |
> quite() | |
#get the working directory | |
> getwd() | |
[1] "/home/huber/Desktop/TechnicalInformation/R/examples/loadData" | |
#set folder as working directory | |
> setwd("/path/") | |
#load a CSV file | |
#the commands loads the file separated with comma | |
#if the file is separated by tabs then created a new file in open office and then save it with the new delimiter | |
> mydata = read.csv("data.csv") # data.csv is a file located in the directory obtained with getwd() | |
> mydata | |
Col1 Col2 Col3 | |
1 10 a1 b1 | |
2 20 a2 b2 | |
3 30 a3 b3 | |
> | |
#search for command | |
> apropos("mean") | |
[1] "colMeans" ".colMeans" "kmeans" "mean" | |
[5] "mean.data.frame" "mean.Date" "mean.default" "mean.difftime" | |
[9] "mean.POSIXct" "mean.POSIXlt" "rowMeans" ".rowMeans" | |
[13] "weighted.mean" | |
#help for a specific command | |
> help(mean) | |
#avoid non-readable values before calculating a function | |
> xbar = mean(mydata$X69.4, na.rm = TRUE) | |
#search for packages and install them | |
> install.packages() | |
#standard deviation | |
> sapply(mydata[2],sd) # Remember sd(mydata) is deprecated already for higher versions of R > 2.14.0 | |
#sort table based on a specific column | |
> sort1.mydata <- mydata[order(columnname),] | |
#write data to a file | |
> write.csv(mydata, "file.csv") | |
#cut interval | |
> a <- runif(100) # a few values for example | |
> b <- cut(a, seq(from=0, to =1, by = 0.2)) | |
#search if a keyword is in a column and return a logical vector | |
# vector <- grepl("keyword", mydata$words) | |
#create a subset from a specific keyword into a new vector | |
> chess <- mydata[grepl("chess", mydata$key),] | |
#extract columns from data frame | |
> slice1 <- mydata[, c("CPU.cores", "Col.sample")] | |
#show labels/name of the columns | |
>colnames(mydata) | |
#clustering and plotting | |
#k-means | |
> | |
km <- kmeans(mycluster, center=2, nstart=4) | |
plot(mycluster, col=km$cluster) | |
#hierarchical cluster (unsupervised) | |
> | |
d <- dist(mycluster) | |
hc <- hclust(d, method= "complete") | |
plot(hc) #dendrogram | |
rect.hclust(hc, k=4) #select are in the dendrogram | |
#DBSCAN | |
> | |
library(fpc) #if package not installed, then install via install.packages("fpc") | |
db <- dbscan(mycluster, eps=.3, MinPts=6) | |
str(db) | |
plot(mycluster, col=db$cluster+1L) | |
#kNN (k-nearest neighbors) | |
> | |
#install.packages("class") #Although is installed by default | |
library(class) | |
A1 = c(0,0) | |
A2 = c(1,1) | |
... | |
B2 = c(5.5, 7) | |
train=rbind(A1,A2,A3, B1,B2,B3) | |
# [,1] [,2] | |
#A1 0.0 0 | |
#A2 1.0 1 | |
#... | |
#B2 5.5 7 | |
cl=factor(c(rep("A",3),rep("B",3))) # This means the train set has 6 rows (A1,A2,A3,B1,B2,B3) | |
test = c1(3.5, 3.5) #test can be also a Matrix - test = matrix (c(4,4,3,3,5,6,7,7), ncol=2, byrow=TRUE) | |
summary(knn(train, test, cl, k=1)) | |
#A B | |
#0 1 #classified in group B | |
#calculate percentiles | |
#calculates 33nd, 57th and 95th percentiles | |
> appenergy <- mydata$ev | |
> quantile(appenergy, c(.33, .57, .95)) | |
#number of rows and cols | |
> nrow(mydata) | |
> ncol(mydata) | |
#select rows with a particular string | |
> myapp <- mydata[grep("chess", mydata$key),] | |
#select all the rows except the one with the keyword | |
> mycommunity <- mydata[!grepl("chess", mydata$key),] | |
#Run script in R | |
> Rscript file.R | |
#ifthen and apply | |
> sapply(list, customfunction(x) ifelse(is.numeric(x), mean(x), NA)) | |
#Iterate a list with name | |
> myhistory | |
$a1 | |
timestamp rtt acceleration userId | |
1 3 789 a1 user1 | |
2 3 586 a1 user2 | |
$a2 | |
timestamp rtt acceleration userId | |
3 3 748 a2 user3 | |
> | |
for (name in names(myhistory)) { | |
print(name) | |
print(myhistory[[name]]) | |
} | |
> | |
for (name in names(myhistory)) { | |
acc1 <- myhistory[[name]] #element of the list | |
acc2 <- myhistory[name] #new list based on the index of the element | |
print(acc) | |
} | |
### Install package locally without online repository | |
install.packages("https://cran.r-project.org/src/contrib/Archive/actuar/actuar_1.2-0.tar.gz", repo=NULL, type="source") | |
#here you can find some potential errors found by trying the commands above | |
FAQ | |
==== | |
***Error 1*** | |
/usr/bin/ld: cannot find -lgfortran | |
/usr/bin/ld: cannot find -lquadmath | |
.... | |
[SOLVED]: first, figure out the version of the gfortran compiler you are using | |
> gfortran --version | |
GNU Fortran (Ubuntu/Linaro 4.7.2-5ubuntu1) 4.7.2 | |
Copyright (C) 2012 Free Software Foundation, Inc. | |
... | |
#Another way to check | |
> gfortran -print-file-name=libgfortran.so | |
/usr/lib/gcc/x86_64-linux-gnu/4.7/libgfortran.so | |
In order to solve this issue, gcc and g++ compiler MUST work with the same version. In other words, gcc and g++ should be 4.7 as well. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment