Last active
May 26, 2017 12:17
-
-
Save rrodrigueznt/c4937a87e7dbaca4ada0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Load any library able to read Excel files | |
# library(XLConnect) | |
## Use gdata to get remote data by URL | |
library(gdata) | |
## Downloading revisions requires authenticated access: we are not able yet to access this way. Only free accessible files can be sourced. | |
# IDIS.publications <- "http://www.idisantiago.es/bin/downloadrev/IDIS/DataRepository/IDIS.publications.xlsx?rev=1.2" | |
IDIS.publications <- "http://www.idisantiago.es/bin/download/IDIS/DataRepository/IDIS.publications.xlsx" | |
IDISpublications <- read.xls(IDIS.publications, sheet='publications') | |
## Read file contents by naming file and sheet | |
# IDIS.publications <- file.path("~/Dropbox/IDIS-devel/IDIS.publications.xlsx") | |
# IDISpublications <- readWorksheetFromFile(IDIS.publications, sheet="publications") | |
## Create the regular expression including all posible appearances of CIMUS adscribed authors | |
patternall <- "Carracedo (Á|A)?|Canedo A|Di(é|e)?guez C|Cadarso C|Cadarso(-| )?Su(á|a)?rez C|Pombo CM?|(Á|A)?lvarez CV?|Ara(ú|u)?jo(-| )?Vilar D|(Á|A)?lvarez,? E|(Á|A)?lvarez(-| )?Castro E|Dom(í|i)?nguez F|F.? Dom(í|i)?nguez|Gonz(á|a)?lez F|Mart(í|i)?n F|Guti(é|e)?rrez H|Guti(é|e)?rrez(-| )?de(-| )?Ter(á|a)?n H|Mart(í|i)?nez I|Mart(í|i)?nez(-| )?Silva I|Rodr(í|i)?guez(-| )?Pallares J|Requena,? JR?|Costoya JA?|Lado J|Labandeira(-| )?Garc(í|i)?a JL?|Juanatey JR?G?|Gonz(á|a)?lez(-| )?Juanatey JR?|Ram(ó|o)?n Gonz(á|a)?lez(-| )?Juanatey J|Zalvide J|Maside X|Lima L|Garc(í|i)?a M|Guerra MJ?|Senar(í|i)?s R|L(ó|o)?pez M|P(é|e)?rez(-| )?Fern(á|a)?ndez R|Nogueiras|Seoane S|Bravo SB?" | |
## Subset by year and author's pattern. Include more yearPublication values to enlarge the data set. | |
## Use other vector after grepl the get a set of columns by column name. | |
idiscimus2012 <- subset(IDISpublications, (IDISpublications$yearPublication %in% c("2012")) & grepl(patternall, IDISpublications$authors)) | |
## Write a CSV compliant table. UTF8 is used by default if fileEncoding is not specified. Two examples are created here. | |
## Check ?write.table in R console for more information. | |
# write.csv(idiscimus2012, file = "~/Dropbox/IDIS-devel/idis.cimus.2012.utf8.csv", row.names = FALSE) | |
# write.csv(idiscimus2012, file = "~/Dropbox/IDIS-devel/idis.cimus.2012.lat1.csv", row.names = FALSE, fileEncoding = "latin1") | |
## Write a XLSX file: maximum control over the process with library xlsx! | |
library(xlsx) | |
write.xlsx2(idiscimus2012, file = "/Users/rrodriguez/Dropbox/IDIS-devel/idis.cimus.2012.xlsx", sheetName = "idis.cimus.2012", row.names = FALSE, showNA=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
My commands
library(XLConnect)
IDIS.publications <- file.path("C:/Users/...../Desktop/IDIS-devel/IDIS.publications.xlsx")
IDISpublications <- readWorksheetFromFile(IDIS.publications, sheet=1)
patternall <- "Carracedo (Á|A)?|Canedo A|Di(é|e)?guez C|Cadarso C|Cadarso(-| )?Su(á|a)?rez C|Pombo CM?|(Á|A)?lvarez CV?|Ara(ú|u)?jo(-| )?Vilar D|(Á|A)?lvarez,? E|(Á|A)?lvarez(-| )?Castro E|Dom(í|i)?nguez F|F.? Dom(í|i)?nguez|Gonz(á|a)?lez F|Mart(í|i)?n F|Guti(é|e)?rrez H|Guti(é|e)?rrez(-| )?de(-| )?Ter(á|a)?n H|Mart(í|i)?nez I|Mart(í|i)?nez(-| )?Silva I|Rodr(í|i)?guez(-| )?Pallares J|Requena,? JR?|Costoya JA?|Lado J|Labandeira(-| )?Garc(í|i)?a JL?|Juanatey JR?G?|Gonz(á|a)?lez(-| )?Juanatey JR?|Ram(ó|o)?n Gonz(á|a)?lez(-| )?Juanatey J|Zalvide J|Maside X|Lima L|Garc(í|i)?a M|Guerra MJ?|Senar(í|i)?s R|L(ó|o)?pez M|P(é|e)?rez(-| )?Fern(á|a)?ndez R|Nogueiras|Seoane S|Bravo SB?"
idiscimus2012 <- subset(IDISpublications, (IDISpublications$yearPublication %in% c("2012")) & grepl(patternall, IDISpublications$authors))
write.table(idiscimus2012, file = "C:/Users/......./Desktop/IDIS-devel/idis.cimus.2012.csv", sep = ";")
nrow(idiscimus2012)