Skip to content

Instantly share code, notes, and snippets.

@tjake

tjake/gist:2661454

Created May 11, 2012
Embed
What would you like to do?
#Load RJDBC
library(RJDBC)
#Load Hive JDBC driver
hivedrv <- JDBC("org.apache.hadoop.hive.jdbc.HiveDriver",
c(list.files("/Users/jake/workspace/bdp/resources/hadoop",pattern="jar$",full.names=T),
list.files("/Users/jake/workspace/bdp/resources/hive/lib",pattern="jar$",full.names=T)))
#Connect to Hive service
hivecon <- dbConnect(hivedrv, "jdbc:hive://localhost:10000/default")
#Create Hive table mapping to Cassandra ColumnFamily
tmp <- dbSendQuery(hivecon,"create external table StockHist(row_key string, column_name string, value double)
STORED BY 'org.apache.hadoop.hive.cassandra.CassandraStorageHandler'
WITH SERDEPROPERTIES ('cassandra.ks.name' = 'PortfolioDemo')")
#Run Hive Query to get returns
hres <- dbGetQuery(hivecon,"select a.row_key ticker, AVG((b.value - a.value)) ret
from StockHist a JOIN StockHist b on
(a.row_key = b.row_key AND date_add(a.column_name,10) = b.column_name)
group by a.row_key order by ret")
#Plot
barplot(hres[,2],names.arg=hres[,1],col = topo.colors(length(hres[,2])), border = NA)
title("Avg 10 Day Return for all Stocks")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment