-
-
Save rrodrigueznt/c4937a87e7dbaca4ada0 to your computer and use it in GitHub Desktop.
## Load any library able to read Excel files | |
# library(XLConnect) | |
## Use gdata to get remote data by URL | |
library(gdata) | |
## Downloading revisions requires authenticated access: we are not able yet to access this way. Only free accessible files can be sourced. | |
# IDIS.publications <- "http://www.idisantiago.es/bin/downloadrev/IDIS/DataRepository/IDIS.publications.xlsx?rev=1.2" | |
IDIS.publications <- "http://www.idisantiago.es/bin/download/IDIS/DataRepository/IDIS.publications.xlsx" | |
IDISpublications <- read.xls(IDIS.publications, sheet='publications') | |
## Read file contents by naming file and sheet | |
# IDIS.publications <- file.path("~/Dropbox/IDIS-devel/IDIS.publications.xlsx") | |
# IDISpublications <- readWorksheetFromFile(IDIS.publications, sheet="publications") | |
## Create the regular expression including all posible appearances of CIMUS adscribed authors | |
patternall <- "Carracedo (Á|A)?|Canedo A|Di(é|e)?guez C|Cadarso C|Cadarso(-| )?Su(á|a)?rez C|Pombo CM?|(Á|A)?lvarez CV?|Ara(ú|u)?jo(-| )?Vilar D|(Á|A)?lvarez,? E|(Á|A)?lvarez(-| )?Castro E|Dom(í|i)?nguez F|F.? Dom(í|i)?nguez|Gonz(á|a)?lez F|Mart(í|i)?n F|Guti(é|e)?rrez H|Guti(é|e)?rrez(-| )?de(-| )?Ter(á|a)?n H|Mart(í|i)?nez I|Mart(í|i)?nez(-| )?Silva I|Rodr(í|i)?guez(-| )?Pallares J|Requena,? JR?|Costoya JA?|Lado J|Labandeira(-| )?Garc(í|i)?a JL?|Juanatey JR?G?|Gonz(á|a)?lez(-| )?Juanatey JR?|Ram(ó|o)?n Gonz(á|a)?lez(-| )?Juanatey J|Zalvide J|Maside X|Lima L|Garc(í|i)?a M|Guerra MJ?|Senar(í|i)?s R|L(ó|o)?pez M|P(é|e)?rez(-| )?Fern(á|a)?ndez R|Nogueiras|Seoane S|Bravo SB?" | |
## Subset by year and author's pattern. Include more yearPublication values to enlarge the data set. | |
## Use other vector after grepl the get a set of columns by column name. | |
idiscimus2012 <- subset(IDISpublications, (IDISpublications$yearPublication %in% c("2012")) & grepl(patternall, IDISpublications$authors)) | |
## Write a CSV compliant table. UTF8 is used by default if fileEncoding is not specified. Two examples are created here. | |
## Check ?write.table in R console for more information. | |
# write.csv(idiscimus2012, file = "~/Dropbox/IDIS-devel/idis.cimus.2012.utf8.csv", row.names = FALSE) | |
# write.csv(idiscimus2012, file = "~/Dropbox/IDIS-devel/idis.cimus.2012.lat1.csv", row.names = FALSE, fileEncoding = "latin1") | |
## Write a XLSX file: maximum control over the process with library xlsx! | |
library(xlsx) | |
write.xlsx2(idiscimus2012, file = "/Users/rrodriguez/Dropbox/IDIS-devel/idis.cimus.2012.xlsx", sheetName = "idis.cimus.2012", row.names = FALSE, showNA=FALSE) |
Check this about R encoding!
https://stat.ethz.ch/pipermail/r-sig-mac/2007-March/003733.html
http://cran.r-project.org/doc/Rnews/Rnews_2005-1.pdf
And check, at least, this about reading data in R!
http://cran.r-project.org/doc/manuals/R-data.html
https://science.nature.nps.gov/im/datamgmt/statistics/r/fundamentals/index.cfm
A must to have full control on the production of Excel files!
https://code.google.com/p/rexcel/
Hi! Great links!
Here's something about regular expressions:
Very handy: http://biostat.mc.vanderbilt.edu/wiki/pub/Main/SvetlanaEdenRFiles/regExprTalk.pdf
http://www.regular-expressions.info/reference.html
My commands
library(XLConnect)
IDIS.publications <- file.path("C:/Users/...../Desktop/IDIS-devel/IDIS.publications.xlsx")
IDISpublications <- readWorksheetFromFile(IDIS.publications, sheet=1)
patternall <- "Carracedo (Á|A)?|Canedo A|Di(é|e)?guez C|Cadarso C|Cadarso(-| )?Su(á|a)?rez C|Pombo CM?|(Á|A)?lvarez CV?|Ara(ú|u)?jo(-| )?Vilar D|(Á|A)?lvarez,? E|(Á|A)?lvarez(-| )?Castro E|Dom(í|i)?nguez F|F.? Dom(í|i)?nguez|Gonz(á|a)?lez F|Mart(í|i)?n F|Guti(é|e)?rrez H|Guti(é|e)?rrez(-| )?de(-| )?Ter(á|a)?n H|Mart(í|i)?nez I|Mart(í|i)?nez(-| )?Silva I|Rodr(í|i)?guez(-| )?Pallares J|Requena,? JR?|Costoya JA?|Lado J|Labandeira(-| )?Garc(í|i)?a JL?|Juanatey JR?G?|Gonz(á|a)?lez(-| )?Juanatey JR?|Ram(ó|o)?n Gonz(á|a)?lez(-| )?Juanatey J|Zalvide J|Maside X|Lima L|Garc(í|i)?a M|Guerra MJ?|Senar(í|i)?s R|L(ó|o)?pez M|P(é|e)?rez(-| )?Fern(á|a)?ndez R|Nogueiras|Seoane S|Bravo SB?"
idiscimus2012 <- subset(IDISpublications, (IDISpublications$yearPublication %in% c("2012")) & grepl(patternall, IDISpublications$authors))
write.table(idiscimus2012, file = "C:/Users/......./Desktop/IDIS-devel/idis.cimus.2012.csv", sep = ";")
nrow(idiscimus2012)
- Gives 178 rows.
- My problem is to insource the scripts and the excel from Dropbox.
To "source" the script from your local R installation...