Skip to content

Instantly share code, notes, and snippets.

@ttmmghmm
Last active March 27, 2016 12:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ttmmghmm/28e88fbf4c20a15cbfb3 to your computer and use it in GitHub Desktop.
Save ttmmghmm/28e88fbf4c20a15cbfb3 to your computer and use it in GitHub Desktop.
Quandl constituents.R, ETFs via Quandl, etfGetSymsParallel.R, iShares ETFs, TTR IKTrading .R
# http://commodityhq.com/2012/for-long-term-investors-the-cheapest-etf-for-every-commodity/
(etfCommod <- list(
precious = c(gold = "IAU", silver = "SIVR", platinium = "PPLT", palladium = "PALL"),
energy = c(wti = "USO", brent = "BNO", natGas = "UNG", gasoline = "UGA", heatingOil = "UHN"),
base = c(copper = "CPER"), # aluminium = )
# agriculture = c()
alternative = c(solar = "TAN") # wind, nuclear
))
# Top Commodity ETFs By Assets http://commodityhq.com/trading-center/
structure(list(Ticker = c("GLD", "IAU", "SLV", "DBC", "DJP",
"DBA", "SGOL", "GSG", "RJI", "PPLT"), etf = c("SPDR Gold Trust",
"COMEX Gold Trust", "Silver Trust", "DB Commodity Index Tracking Fund",
"Dow Jones-UBS Commodity Index TR ETN", "DB Agriculture Fund",
"Physical Swiss Gold Shares", "GSCI Commodity-Indexed Trust Fund",
"Rogers Intl Commodity ETN", "Physical Platinum Shares"), assetsBn = c(32802L,
6864L, 6410L, 5694L, 1682L, 1558L, 1110L, 1095L, 952L, 751L)), .Names = c("Ticker",
"etf", "assetsBn"), row.names = c(NA, -10L), class = "data.frame")
# https://github.com/hadley/data-baby-names
library(plyr)
# paths <- dir("data", pattern = "\\.csv$", full.names = TRUE)
# names(paths) <- basename(paths)
# ldply(paths, read.csv, stringsAsFactors = FALSE)
# devtools::install_github("Bart6114/sparklines")
library(sparklines)
library(dplyr)
# convert list of xts to a data.frame
clDat <- data.frame(lapply(dat, Cl))
do.call("
dat %>%
GOOG <-
getSymbols("GOOG", src = 'yahoo', from = '2015-03-15', env = NULL) %>%
as.data.frame %>%
mutate(day_result = GOOG.Close - GOOG.Open)
# https://stackoverflow.com/questions/5246843/how-to-get-a-complete-list-of-ticker-symbols-from-yahoo-finance
# See also below (txt <- paste("http://www.nasdaq.com/quotes/",
library(readr)
library(dplyr)
library(magrittr) # after dpylr?
#setInternet2(TRUE)
csvQuandlUrl <- 'http://s3.amazonaws.com/quandl-static-content/Ticker+CSV%27s/'
csvQuandl <- list(
'Stock Index Constituents' = c(
'S&P 500 Index' = 'Indicies/SP500.csv',
'Dow Jones Ind Avg' = 'Indicies/dowjonesIA.csv',
'NASDAQ Composite Index'= 'Indicies/NASDAQComposite.csv',
'NASDAQ 100 Index' = 'Indicies/nasdaq100.csv',
'NYSE Composite Index' = 'Indicies/NYSEComposite.csv',
'NYSE 100 Index' = 'Indicies/nyse100.csv',
'FTSE 100 Index' = 'Indicies/FTSE100.csv'),
'Futures Metadata' = c(
'futures metadata' = 'Futures/meta.csv'),
'Commodities' = c(
'commodities' = 'commodities.csv'),
'Currencies and Countries' = c(
'ISO Currency Codes' = 'Currencies.csv',
'ISO 3-letter Cntry Cds' = 'ISOCodes.csv',
'Currency Cross Rates' = 'currencies.csv'),
'Cross-Country Stats' = c(
'Country Overview' = 'Indicators/overview.csv',
'Demography' = 'Indicators/demography.csv',
'Economics' = 'Indicators/economics.csv',
'Education' = 'Indicators/education.csv',
'Energy' = 'Indicators/education.csv',
'Health' = 'Indicators/health.csv',
'Society' = 'Indicators/society.csv'),
'misc' = c(
'nasdaq100' = 'http://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=all&render=download')
)
library(magrittr) # not dplyr cos we need Tee %T>%
# https://www.quandl.com/data/CME/metadata
csvQuandl[['Stock Index Constituents']][2] %>%
# (stopifnot(length(.) == 1)) %>%
paste0(csvQuandlUrl, .) %>%
url %>% # url wont accept https?
read_csv %T>%
glimpse ->
df
# https://github.com/smbache/magrittr
readUrlContents <- . %>%
paste0(csvQuandlUrl, .) %>%
{ print(.) ; .} %>%
url %>% # url wont accept https?
{ Sys.sleep(.5) ; .} %>% # wait 0.5 cos not logged into Quandl
read_csv
df <- csvQuandl[4] %>%
unlist %>%
lapply(readUrlContents) # %>%
# setNames(names(.)) %>% # doesnt actually do anything additional
# { . ; .} %>% # local assignment only so df is not global.
# lapply(glimpse) # cant be
str(df)
# iris$Sepal.Length %<>% sqrt # https://github.com/smbache/magrittr
require(magrittr)
displayDf <- . %T>%
{ head(.) %>% print } %T>%
{ tail(.) %>% print } %T>%
str %>%
glimpse
data(iris)
iris %>% displayDf
library(quantmod)
tickers <- c("MSFT","AAPL", "AMZN", "YHOO", "XOM", "CVX", "UNH", "NKE")[1]
xxx <- . %>% getSymbols(from="2015-01-01", auto.assign=FALSE, warnings=FALSE) %>% .[,6]
x <- tickers %>% { print(.) ; . } %>% lapply(XXx) # { setNames(, nm = .) }
str(x)
%>% cbind
prices <- do.call(cbind, lapply(tickers, function(x)
getSymbols(x, from="2010-01-01", auto.assign=FALSE, warnings=FALSE)[,6]))
colnames(prices) <- tickers
returns <- diff(prices, arithmetic=FALSE, na.pad=FALSE) - 1
means <- sapply(returns, mean)
# make 24 API calls to get the full list. increasing the “page” parameter
df2 <- lapply(1:1, function(ii)
paste0('http://www.quandl.com/api/v2/datasets.csv?query=*&source_code=ICE&per_page=300&page=', ii) %>%
url %>% # url wont accept https?
read_csv(., col_names = FALSE)
)
# read_csv(url('http://www.quandl.com/api/v2/datasets.csv?query=*&source_code=ICE&per_page=300&page=1'))
df <- do.call(rbind_all, df2)
str(df)
# first page in different formats?
iceMeta <- c(
'http://www.quandl.com/api/v2/datasets.csv?query=*&source_code=ICE&per_page=300&page=1',
'http://www.quandl.com/api/v2/datasets.json?query=*&source_code=ICE&per_page=300&page=1',
'http://www.quandl.com/api/v2/datasets.xml?query=*&source_code=ICE&per_page=300&page=1')[1]
iceMeta %>%
url %>% # url wont accept https?
read_csv(., col_names = FALSE) %>%
glimpse
GOOG/SWX_IUKP UK property index - Quandl.
# install.packages("devtools")
require(devtools)
install.packages("FinancialInstrument", repos="http://R-Forge.R-project.org")
install.packages("blotter", repos="http://R-Forge.R-project.org")
install.packages("quantstrat", repos="http://R-Forge.R-project.org")
install.packages("foreach")
install_github("IlyaKipnis/IKTrading")
quandClean - for getting Quandl futures data - in IKTrading package
See examples and code for retrieving daily futures data from:
* https://quantstrattrader.wordpress.com/2014/07/22/intermission-a-data-file-for-futures-data-from-quandl/
With ETF's like GLD - extend the data further back than the ETF's inception date
(Gold and Bonds back to 1978!),
Examples and code here:
https://quantstrattrader.wordpress.com/2014/11/02/its-amazing-how-well-dumb-things-get-marketed/
To view goto http://bl.ocks.org/ttmmghmm/28e88fbf4c20a15cbfb3
* See http://bl.ocks.org/ for formatting/display.
what to put <em>here</em>?
A graph of some kind as a png file stored in dropbox?
# http://commodityhq.com/2012/for-long-term-investors-the-cheapest-etf-for-every-commodity/
(etfCommod <- list(
precious = c(gold = "IAU", silver = "SIVR", platinium = "PPLT", palladium = "PALL"),
energy = c(wti = "USO", brent = "BNO", natGas = "UNG", gasoline = "UGA", heatingOil = "UHN"),
base = c(copper = "CPER"), # aluminium = )
# agriculture = c()
alternative = c(solar = "TAN") # wind, nuclear
))
baseurl <- 'https://raw.githubusercontent.com/emiruz/isharesPortfolio/master'
# source(file.path(baseurl, "data.R"))
# getSavedData <- function(f="prod.rds") readRDS(f)
getProductId <- function(searchText = "IDVY+IUKD+IAPD+SEDY+SLXX+IEBC+IGLT+SEML+SEMB+IBTS+IUKP", seconds = 5)
# TODO: function to get a list of ETFs
{
getString <- paste0("http://www.ishares.com/uk/individual/en/search/product-search-results?siteEntryPassthrough=true&searchText=",
searchText,
"&searchType=productSearch&start=1&maxResults=50&pageSize=50")
d <- GET(getString, timeout(seconds))
d <- content(d, as="text")
tickers <- unlist(strsplit(searchText, split = "+", fixed = TRUE))
ids <- unlist(regmatches(d, gregexpr("(?<=en/products/)\\d+/[A-Z]+", d, perl=TRUE)))
if (length(tickers) != length(ids))
{ warning("length of inputs and outputs are not matching")
print(tickers)
print(ids)
}
structure(ids, names = tickers)
}
ids <- getProductId()
# http://www.ishares.com/uk/individual/en/products/product-list#categoryId=111&lvl2=overview
getProduct <- function(id, seconds = 5) {
prefix <- "http://www.ishares.com"
url <- paste(prefix, "/uk/individual/en/products/",id,"?siteEntryPassthrough=true", sep="")
d <- content(GET(url), as="text", timeout(seconds))
holdingsUrl <- paste(prefix, regmatches(d,regexpr("/uk/individual/en/products/.+?(?=\\.ajax\\?tab=all)", d, perl=TRUE)), ".ajax?tab=all&fileType=json", sep="")
distributionsUrl <- paste(prefix, regmatches(d,regexpr("/uk/individual/en/products/.+?(?=\\.ajax\\?tab=distributions)", d, perl=TRUE)), ".ajax?tab=distributions&fileType=json", sep="")
d <- gsub("\n","",iconv(d, to="UTF-8"))
nav <- gsub("[^0-9\\.]","",regmatches(d, regexpr("(?<=nav-value\">).+?(?=<)",d,perl=TRUE)))
dist <- gsub("\n|\\\\t|\\\\n","",content(GET(distributionsUrl, timeout(5)), as="text"))
dist <- fromJSON(regmatches(dist,regexpr("\\[.+\\]", dist)))
hold <- gsub("\n|\\\\t|\\\\n","",content(GET(holdingsUrl, timeout(5)), as="text"))
hold <- fromJSON(regmatches(hold,regexpr("\\[.+\\]", hold)))
dist <- data.frame(id=id,date=as.Date(dist$colPayableDate$display, "%d/%b/%Y"), dist=as.numeric(dist$colTotalDistribution$raw), stringsAsFactors=FALSE)
hold <- data.frame(id=id,isin=hold$colIsin,name=hold$colIssueName,holding=as.numeric(hold$colHoldingPercent$raw)/100, sector=hold$colSectorName, country=hold$colCountryOfRisk, stringsAsFactors=FALSE)
d <- readHTMLTable(d, header=FALSE)
d <- d[[NROW(d)-2]]
val <- function(n) as.character(head(d$V2[grepl(n,d$V1)],1))
d <- data.frame(id=id,class=val("Asset Class"), nav=as.numeric(nav), currency=val("Base Currency"), expense=val("Total Expense Ratio"), freq=val("Distribution Frequency"), method=val("Methodology"), structure=val("Product Structure"), stringsAsFactors=FALSE)
d$expense <- as.numeric(gsub("%","",d$expense)) / 100
list(keyfacts=d,holdings=hold,dist=dist)
}
getZones <- function()
{ zones <- vector()
zones[c("United States", "Canada")] <- "W0"
zones[c("United Kingdom", "Guernsey", "Australia", "Ireland")] <- "W1"
zones[c("Euroland", "France", "Germany", "Belgium", "Austria", "Sweden", "Denmark", "Netherlands", "Norway", "Luxembourg")] <- "W2"
stopifnot(!any(duplicated(names(zones))))
names(zones) <- tolower(names(zones))
zones
}
zones <- getZones()
require(XML)
require(jsonlite) # getProduct
require(tcltk)
getProducts <- function(ids) {
ids <- sort(unique(ids))
pb <- tkProgressBar(min = 0, max = NROW(ids), initial = 0, title="Data collection in progress ...")
x <- lapply(seq(along = ids), function(ii)
{ id <- ids[ii]
setTkProgressBar(pb, ii, label = paste0(id, " (", ii, " of ", NROW(ids), ")"))
getProduct(id)
})
names(x) <- ids
close(pb)
lapply(x, function(ii)
{ ii$holdings$zone <- zones[match(tolower(ii$holdings$country), names(zones))]
ii$holdings$zone[is.na(ii$holdings$zone)] <- "W3"
ii$dist$month <- as.numeric(strftime(ii$dist$date,"%m"))
ii
})
}
x <- getProducts(ids)
http://www.quintuitive.com/2014/12/13/parallelism-via-parsapply/
* mclapply to kick off parallel R processes and to demonstrate inter-process synchronization via the flock package
* branch between single core and single machine execution
* RSQLite
###
# https://aschinchon.wordpress.com/2015/05/08/odd-connections-inside-the-nasdaq-100/
# See also below http://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=all&render=download'
index <- c('nasdaq-financial-100', 'djia', 'nasdaq-100')[3]
(txt <- paste("http://www.nasdaq.com/quotes/", index, "-stocks.aspx?render=download", sep = ""))
str(tickers <- read.csv(url(txt), header = TRUE))
library(parallel)
library(quantmod)
tmp <- tickers$Symbol[1:4]
system.time( syms <- mclapply(tmp, function(xx)
suppressWarnings(getSymbols(xx, auto.assign=FALSE, , verbose=TRUE))))
names(syms) <- tmp
str(syms)
###
tickers<-c("GLD","DBC", "EEM", "EFV","EFG","BND","TLT","SHY","IWF","IWD","IWC","IWO","IWN","VNQ")
# Use *parallel::mclapply* to call *quantmod::getSymbols*.
library(parallel)
library(quantmod)
getData <- function (symbols, ...)
# get symbols in parallel if number of cores > 1
structure(
mclapply(symbols, function(x) getSymbols(x, auto.assign = FALSE, ...)),
names = symbols
)
dat <- getData(head(tickers, n = -1)) # all tickers?
dat$GLD["2015-04"]
str(head(dat, n = 2), max.level = 1)
TODO:
* Auto generate ETF tickers with description and attributes like weights.
* To view goto http://bl.ocks.org/ttmmghmm/28e88fbf4c20a15cbfb3
* See http://bl.ocks.org/ for formatting/display.
# https://stackoverflow.com/questions/5246843/how-to-get-a-complete-list-of-ticker-symbols-from-yahoo-finance
# See also below (txt <- paste("http://www.nasdaq.com/quotes/",
library(readr)
library(dplyr)
# setInternet2(TRUE)
urls <-
c('https://s3.amazonaws.com/quandl-static-content/Ticker+CSV%27s/Indicies/SP500.csv',
'http://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=all&render=download'
)
readUrls <-
. %>%
url %>%
read_csv %T>%
print %T>% # read_csv returns Classes ‘tbl_df’, ‘tbl’ and 'data.frame'
glimpse
df <- urls[1] %>% readUrls
require(magrittr)
displayDf <- . %T>% # move to utilities package?
{ head(.) %>% print } %T>%
{ tail(.) %>% print } %T>%
str %T>%
glimpse %>%
invisible
data(iris)
iris %>% displayDf
df <- urls[1] %>% readUrls %>% displayDf
#####
startDate <- "2005-01-01"
# TODO: get this info from wikimedia
urls <- c(sAndP = "http://en.wikipedia.org/wiki/List_of_S%26P_500_companies")
# https://en.wikipedia.org/wiki/NASDAQ-100#Components
tables <- readHTMLTable(urls)
tickers <- as.matrix(tables[[1]]["Ticker symbol"]) # a matrix?
instrumentRtn <- function(instrument=instrument,startDate=startDate,lag=lag){
# only get the data, use try, return a list or a matrix?
# does get.hist.quote offer monthly parameter? what about getSymbols from quantmod?
price <- get.hist.quote(instrument, quote="Adj", start=startDate, retclass="zoo")
monthlyPrice <- aggregate(price, as.yearmon, tail, 1)
monthlyReturn <- diff(log(monthlyPrice),lag=lag)
monthlyReturn <- exp(monthlyReturn)-1
return(monthlyReturn)
}
dataFactor <- list()
dataRtn <- list()
for (i in 1:length(tickers[1:50])) {
print(tickers[i])
dataFactor[[i]] <- instrumentRtn(tickers[i],startDate,lag=12)
dataRtn[[i]] <- instrumentRtn(tickers[i],startDate,lag=1)
}
# list of functions in TTR that _might_ be used as indicators
# Assume if first argument is names 'x' then that function is a univariate indicator.
library(TTR)
lsf.str("package:TTR") %>%
sapply(get) %>%
sapply(formals) %>%
lapply(names) %>%
sapply(FUN = "[[", 1) ->
firstArg
(indicFuns <- names(firstArg)[firstArg == "x"])
(x <- rnorm(1e1L))
indicFuns[1:2] %>% lapply(get, x)
#install_github(repo="IlyaKipnis/IKTrading")
require(IKTrading)
CME_GC <- quandClean("CHRIS/CME_GC", start_date= "1990-01-01", end_date="2015-02-14", verbose=verbose) #Gold
# http://www.thertrader.com/2015/03/23/factor-evaluation-in-quantitative-portfolio-management/
library(tseries)
library(quantmod)
library(XML)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment