This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rvest) | |
from <- 248 | |
to <- 366 | |
pages <- paste0("http://www.shanhaimiwenlu.com/", c(245, seq(from, to)), ".html") | |
system.time( | |
thelist <- lapply(pages, function(p) { | |
writeLines(substr(p, 31, 33), "break.txt") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(ggplot2) | |
require(reshape2) | |
# populate some data | |
d <- data.frame(month = c("Jan", "Feb", "Mar", "Apr", "May", "Jun"), | |
visitors = c(156898, 187456, 238456, 256789, 228764, 185632), | |
ratio = c(1.45, 1.32, 1.29, 1.22, 1.14, 1.01) | |
) | |
d$month <- factor(d$month, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun")) | |
d$visits <- round(d$visitors * d$ratio, 0) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################################### | |
# This program is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 思路: 以两点中心位置作为地图中心绘制底图 >> 获取点坐标 >> 在底图上绘制点 >> 绘制连线 | |
library(ggmap) | |
# 获取复旦的经纬度坐标 | |
fd_geo <- geocode("Fudan University") | |
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Shanghai&sensor=false | |
## Google Maps API Terms of Service : http://developers.google.com/maps/terms | |
## lon lat | |
## 1 121.4737 31.23039 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Adult Standard 成人标准舞 | |
# Adult Latin 成人拉丁舞 | |
# Adult Ten Dance 成人十项舞 | |
# Youth Standard 青年标准舞 | |
# Youth Latin 青年拉丁舞 | |
# Senior I Standard 中青一组标准舞 | |
# Senior I Latin 中青一组拉丁舞 | |
# Senior II Standard 中青二组标准舞 | |
# Senior II Latin 中青二组拉丁舞 | |
# Senior III Standard 中青三组标准舞 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Private Sub splitworkbook() | |
Dim sht As Worksheet | |
Dim MyBook As Workbook | |
Set MyBook = ActiveWorkbook | |
For Each sht In MyBook.Sheets | |
sht.Copy | |
ActiveWorkbook.SaveAs Filename:=MyBook.Path & "\" & Left(ThisWorkbook.Name, 7) & "_" & LCase(Trim(sht.Name)), FileFormat:=xlCSV | |
'Filename:一句将工作簿名"w201106"加上小写的工作表名作为拆分后的csv文件名 | |
'xlCSV: 将工作簿另存为CSV默认格式 | |
ActiveWorkbook.Close savechanges:=True 'savechanges:=True 避免了每次弹出确认保存的对话框 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# generate the htm file names of male athletes to read from | |
male_adult <- paste("male_adult_",1:3, ".htm", sep = "") | |
male_youth <- paste("male_youth_",1:2, ".htm", sep = "") | |
male_senior <- paste("male_senior_",1:3, ".htm", sep = "") | |
male_junior <- paste("male_junior_",1:2, ".htm", sep = "") | |
male <- c(male_adult, male_youth, male_senior, male_junior, "male_juvenile.htm") | |
male <- matrix(male, nrow = length(male)) | |
male <- sapply(male, function(x) readHTMLTable(x, encoding ="utf-8", stringsAsFactors = F)) | |
head(male[[1]]) | |
# Name Surname Country Category Status Member # |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
url <- paste("couple_list_", 1:6, ".htm", sep = "") | |
url <- matrix(url, nrow = 6) | |
couple <- sapply(url, function(x) readHTMLTable(x, stringsAsFactors = F, encoding = "utf-8")) | |
head(couple[[1]]) | |
head(couple[[1]][,2:6]) | |
couplelist <- data.frame() | |
for (i in 1:6) {couplepage <- couple[[i]][,2:6] | |
couplelist <- rbind(couplelist, couplepage) | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# let's do the data cleansing in R | |
# read back the txt into R, as character | |
d <- scan("weather.txt", what = "", sep = "") | |
d[grep("[0123456789]$",d)] <- "\n" | |
i <- length(d) | |
wt1 <- c() | |
for (j in 1:i) {wt1 <- paste(wt1, d[j], sep = ",")} | |
write.table(wt1, "wt1.txt") # please go check wt1.txt | |
# import back wt1.txt as data frame |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# R for web scraping - weather project | |
# Get web raw data via readHTMLtable() from XML package using R | |
library(XML) | |
# generate the url set in matrix format, to be applied with sallpy() later on | |
y1 <- paste("20110", 1:12, sep = "") | |
y1[10:12] <- c("201110", "201111", "201112") | |
y2 <- paste("20120", 1:12, sep = "") | |
y2[10:12] <- c("201210", "201211", "201212") | |
y3 <- c("201301", "201302", "201303") | |
y <- c(y1, y2, y3) |
NewerOlder