##1 Data manipulation
-
If the data contains NA values, it regards it as factor, not numeric.
DATA$COLUMN <- as.numeric(as.character(DATA$COLUMN))
-
Rename the column:
names(DATA)[2] <- "NEW_NAME"
sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t") | |
p <- ggplot(sample) | |
p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) + | |
theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12), | |
axis.text.y=element_text(size=12)) + | |
guides(fill=F) + scale_fill_grey() + | |
scale_x_discrete(limits=c("NSR", "stock-related", "NTR", "ticker-related", "NEG", "NEU", "POS")) + | |
scale_y_continuous(limits = c(0, 30)) | |
print(p) |
sample <- read.table("~/Dropbox/sample.txt", header=T, sep="\t") | |
p <- ggplot(sample) | |
p <- p + geom_boxplot(aes(x=factor(Type), y=Word, fill=factor(Type)), notch=T, outlier.shape = NA) + | |
theme(axis.text.x=element_text(angle=15, hjust=0.8, vjust=1, size=12), | |
axis.text.y=element_text(size=12)) + | |
guides(fill=F) + scale_fill_grey() + | |
scale_y_continuous(limits = c(0, 30)) | |
print(p) |
##1 Data manipulation
If the data contains NA values, it regards it as factor, not numeric.
DATA$COLUMN <- as.numeric(as.character(DATA$COLUMN))
Rename the column:
names(DATA)[2] <- "NEW_NAME"
## This script is for crawling tweets with a specific address file. | |
#!/usr/bin/env bash | |
DIR=PARENT_DIR/`date "+%d-%m-%y-%H:%M"` #set the download file based to download date | |
mkdir -p $DIR #make dir according to above | |
wget -i EXTERNAL_ADDRESS_LIST -np -r -N -l1 -P $DIR |
m.geQuote <- as.matrix(geQuote[,2:5]) | |
acf.geQuote <- acf(m.geQuote, lag=5, plot=F, na.action=na.contiguous) | |
m.acf.geQuote <- melt(acf.geQuote$acf) |
str(acf.geQuote) | |
List of 6 | |
$ acf : num [1:5, 1:4, 1:4] 1 -0.1917 -0.478 0.1049 0.0648 ... | |
$ type : chr "correlation" | |
$ n.used: int 5 | |
$ lag : num [1:5, 1:4, 1:4] 0 1 2 3 4 0 -1 -2 -3 -4 ... | |
$ series: chr "m.geQuote" | |
$ snames: chr [1:4] "Open" "Close" "Low" "High" | |
- attr(*, "class")= chr "acf" |
p <- ggplot(m.acf.geQuote) | |
p <- p + geom_raster(aes(x=Var1, y=Var2, lable=value, fill= value)) + | |
facet_wrap(~Var3, nrow=4) + | |
ggtitle("Cross-correlation of 4 different prices of GE ticker") + | |
theme(legend.position="none") + | |
labs(fill="Correlation") + | |
xlab("") + ylab("") | |
print(p) |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 1] <- "Day0" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 2] <- "Day1" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 3] <- "Day2" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 4] <- "Day3" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 5] <- "Day4" | |
m.acf.geQuote$Var1 <- factor(m.acf.geQuote$Var1, | |
+ levels=unique(m.acf.geQuote$Var1), ordered=T) | |
m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 1] <- "Open" | |
m.acf.geQuote$Var2[m.acf.geQuote$Var2 == 2] <- "Close" |
m.geQuote <- as.matrix(geQuote[,2:5]) | |
acf.geQuote <- acf(m.geQuote, lag=5, plot=F, na.action=na.contiguous) | |
m.acf.geQuote <- melt(acf.geQuote$acf) | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 1] <- "Day0" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 2] <- "Day1" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 3] <- "Day2" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 4] <- "Day3" | |
m.acf.geQuote$Var1[m.acf.geQuote$Var1 == 5] <- "Day4" |
##AWK notes##
selective printing
awk '$2 ~ regex, { $1="", pring $0}'
If $2 = regex, then print the whole line but not $1