Skip to content

Instantly share code, notes, and snippets.

@huberflores
Last active March 1, 2018 13:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save huberflores/3b209da6e81a0cfe774d to your computer and use it in GitHub Desktop.
Save huberflores/3b209da6e81a0cfe774d to your computer and use it in GitHub Desktop.
Common Commands to use in R
###DO NOT delete header###
#
# author Huber Flores
#
#clear screen
> command
Ctrl + l
>
#close console
> quite()
#get the working directory
> getwd()
[1] "/home/huber/Desktop/TechnicalInformation/R/examples/loadData"
#set folder as working directory
> setwd("/path/")
#load a CSV file
#the commands loads the file separated with comma
#if the file is separated by tabs then created a new file in open office and then save it with the new delimiter
> mydata = read.csv("data.csv") # data.csv is a file located in the directory obtained with getwd()
> mydata
Col1 Col2 Col3
1 10 a1 b1
2 20 a2 b2
3 30 a3 b3
>
#search for command
> apropos("mean")
[1] "colMeans" ".colMeans" "kmeans" "mean"
[5] "mean.data.frame" "mean.Date" "mean.default" "mean.difftime"
[9] "mean.POSIXct" "mean.POSIXlt" "rowMeans" ".rowMeans"
[13] "weighted.mean"
#help for a specific command
> help(mean)
#avoid non-readable values before calculating a function
> xbar = mean(mydata$X69.4, na.rm = TRUE)
#search for packages and install them
> install.packages()
#standard deviation
> sapply(mydata[2],sd) # Remember sd(mydata) is deprecated already for higher versions of R > 2.14.0
#sort table based on a specific column
> sort1.mydata <- mydata[order(columnname),]
#write data to a file
> write.csv(mydata, "file.csv")
#cut interval
> a <- runif(100) # a few values for example
> b <- cut(a, seq(from=0, to =1, by = 0.2))
#search if a keyword is in a column and return a logical vector
# vector <- grepl("keyword", mydata$words)
#create a subset from a specific keyword into a new vector
> chess <- mydata[grepl("chess", mydata$key),]
#extract columns from data frame
> slice1 <- mydata[, c("CPU.cores", "Col.sample")]
#show labels/name of the columns
>colnames(mydata)
#clustering and plotting
#k-means
>
km <- kmeans(mycluster, center=2, nstart=4)
plot(mycluster, col=km$cluster)
#hierarchical cluster (unsupervised)
>
d <- dist(mycluster)
hc <- hclust(d, method= "complete")
plot(hc) #dendrogram
rect.hclust(hc, k=4) #select are in the dendrogram
#DBSCAN
>
library(fpc) #if package not installed, then install via install.packages("fpc")
db <- dbscan(mycluster, eps=.3, MinPts=6)
str(db)
plot(mycluster, col=db$cluster+1L)
#kNN (k-nearest neighbors)
>
#install.packages("class") #Although is installed by default
library(class)
A1 = c(0,0)
A2 = c(1,1)
...
B2 = c(5.5, 7)
train=rbind(A1,A2,A3, B1,B2,B3)
# [,1] [,2]
#A1 0.0 0
#A2 1.0 1
#...
#B2 5.5 7
cl=factor(c(rep("A",3),rep("B",3))) # This means the train set has 6 rows (A1,A2,A3,B1,B2,B3)
test = c1(3.5, 3.5) #test can be also a Matrix - test = matrix (c(4,4,3,3,5,6,7,7), ncol=2, byrow=TRUE)
summary(knn(train, test, cl, k=1))
#A B
#0 1 #classified in group B
#calculate percentiles
#calculates 33nd, 57th and 95th percentiles
> appenergy <- mydata$ev
> quantile(appenergy, c(.33, .57, .95))
#number of rows and cols
> nrow(mydata)
> ncol(mydata)
#select rows with a particular string
> myapp <- mydata[grep("chess", mydata$key),]
#select all the rows except the one with the keyword
> mycommunity <- mydata[!grepl("chess", mydata$key),]
#Run script in R
> Rscript file.R
#ifthen and apply
> sapply(list, customfunction(x) ifelse(is.numeric(x), mean(x), NA))
#Iterate a list with name
> myhistory
$a1
timestamp rtt acceleration userId
1 3 789 a1 user1
2 3 586 a1 user2
$a2
timestamp rtt acceleration userId
3 3 748 a2 user3
>
for (name in names(myhistory)) {
print(name)
print(myhistory[[name]])
}
>
for (name in names(myhistory)) {
acc1 <- myhistory[[name]] #element of the list
acc2 <- myhistory[name] #new list based on the index of the element
print(acc)
}
### Install package locally without online repository
install.packages("https://cran.r-project.org/src/contrib/Archive/actuar/actuar_1.2-0.tar.gz", repo=NULL, type="source")
#here you can find some potential errors found by trying the commands above
FAQ
====
***Error 1***
/usr/bin/ld: cannot find -lgfortran
/usr/bin/ld: cannot find -lquadmath
....
[SOLVED]: first, figure out the version of the gfortran compiler you are using
> gfortran --version
GNU Fortran (Ubuntu/Linaro 4.7.2-5ubuntu1) 4.7.2
Copyright (C) 2012 Free Software Foundation, Inc.
...
#Another way to check
> gfortran -print-file-name=libgfortran.so
/usr/lib/gcc/x86_64-linux-gnu/4.7/libgfortran.so
In order to solve this issue, gcc and g++ compiler MUST work with the same version. In other words, gcc and g++ should be 4.7 as well.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment