Skip to content

Instantly share code, notes, and snippets.

@yanping
Last active April 26, 2016 10:44
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 6 You must be signed in to fork a gist
  • Save yanping/4619440 to your computer and use it in GitHub Desktop.
Save yanping/4619440 to your computer and use it in GitHub Desktop.
从和讯读取财务数据
# getHexunFinaceData.r
# 从和讯读取财务数据
# version: 0.93
# 调用语法:
# getHexunFinaceData(stockid, start, end, type)
# 参数含义:
# stockid 股票代码
# start 起始年份
# end 结束年份
# type 财务报告类别,默认为"b",表示资产负债表;"i"表示利润表;"c"表示现金流量表
# 例子: test <- getHexunFinaceData(600028,2001,2002,"b")
getHexunFinaceData <- function(stockid,
start = 1991,
end = as.numeric(substr(Sys.time(), 1, 4)),
type = "b") {
if (!is.vector(stockid)) {
stop("参数stockid必须是向量形式!")
}
if (start > end) {
stop("起始或结束年份输入有误!")
}
if (!type %in% c("b", "c", "i")) {
stop("错误的财务报告类型!请检查参数type...")
}
require(stringr)
stockid <- as.character(stockid)
vnameOld <- vector()
value.mat <- vector()
total <- length(stockid) * (end - start + 1) * 4
# create progress bar
pb <- txtProgressBar(min = 0, max = total, style = 3)
i <- 1
for (stock in stockid) {
while (nchar(stock) < 6) {
stock <- paste("0", stock, sep = "")
}
if (nchar(stock) > 6) {
warning(paste("invalid stock code: ", stock, sep = ""))
next
}
for (year in start:end) {
accountdates <- paste(year, c(".03.15", ".06.30", ".09.30", ".12.31"), sep = "")
for (term in accountdates) {
if (type == "b") {
address <- "http://stockdata.stock.hexun.com/2008/zcfz.aspx?stockid="
} else if (type == "i") {
address <- "http://stockdata.stock.hexun.com/2008/lr.aspx?stockid="
} else if (type == "c") {
address <- "http://stockdata.stock.hexun.com/2008/xjll.aspx?stockid="
}
url <- paste(address, stock, "&accountdate=", term, sep = "")
txt <- readLines(url)
if (sessionInfo()$R.version$os == "linux-gnu") {
txt <- iconv(txt, from = "gb2312", to = "UTF-8")
}
linenum <- grep("<span id=\"ControlEx1_lbl\">", txt)
line <- txt[linenum]
if (length(line) == 0 | line == "\t\t\t\t\t<span id=\"ControlEx1_lbl\"></span>") {
setTxtProgressBar(pb, i)
i <- i + 1
next
} else {
vname.start <- gregexpr("<td class='dotborder' width='45%'><div class='tishi'><strong>", line)[[1]] + 61
vname.end <- gregexpr("</strong></div></td><td>", line)[[1]] - 1
vname <- str_sub(line, vname.start, vname.end)
vname <- c("股票代码", vname)
if (is.null(vnameOld) | all(vname == vnameOld)) {
value.start <- gregexpr("<td><div class='tishi'>", line)[[1]] + 23
value.end <- gregexpr("</div></td><tr>", line)[[1]] - 1
value <- str_sub(line, value.start, value.end)
value <- gsub(",", "", value)
value <- c(stock, value)
value.mat <- rbind(value.mat, value)
vnameOld <- vname
setTxtProgressBar(pb, i)
i <- i + 1
} else {
msg <- paste("看来股票", stock, "的数据格式在", year, "年发生了变化!", sep = "")
stop(msg)
}
}
}
}
}
close(pb)
if (is.null(value.mat)) {
return(NULL)
} else {
rownames(value.mat) <- NULL
value.df <- as.data.frame(value.mat, stringsAsFactors = FALSE)
value.df[value.df == "--"] <- NA
value.df[, 3:(ncol(value.df) - 1)] <- apply(value.df[, 3:(ncol(value.df) - 1)], 2, as.numeric)
colnames(value.df) <- vname
return(value.df)
}
}
@yanping
Copy link
Author

yanping commented Jan 25, 2013

# 如果没有stringr包,就先安装该包
install.packages("stringr")

# 先加载程序 比如getHexunFinaceData.r放在 D盘下面的Rcode目录下
source("D:/Rcode/getHexunFinaceData.r")

# 函数调用
test1 <- getHexunFinaceData(600028,2001,2002) # 一只股票的资产负债表
test2 <- getHexunFinaceData(c(600028,600022),2011,2012) # 两只股票负债表

test3 <- getHexunFinaceData(c(600028,600022),2011,2012, "i") # 两只股票的利润表

# 如果要把读取的数据写到文件中
write.csv(test1,"D:/datafile.csv",row.names = FALSE)

@yanping
Copy link
Author

yanping commented Jan 27, 2013

2012年1月27日 增加了进度条

@yanping
Copy link
Author

yanping commented Jan 27, 2013

探测当前是哪种操作系统 还几种方式

if(.Platform$OS.type == "unix") {
   ...
}

或者

Sys.info()['sysname']

@laoyang945
Copy link

有时候和讯数据不够,对应的日期没有数据,希望对此做容错处理
是否用XML包里面的readHTMLtable更快?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment