Extract Mendeley Notes Using R
library(RSQLite) # Database | |
library(dplyr) # This really should be loaded by default always... | |
library(tidyr) # 'spread' function to change table from 'long' to 'wide' format | |
# Uncomment the following line with your Mendeley path. | |
# See here for more information: http://support.mendeley.com/customer/en/portal/articles/227951-how-do-i-locate-mendeley-desktop-database-files-on-my-computer- | |
# mendeley.path = "C:/Users/{username}/AppData/Local/Mendeley Ltd./Mendeley Desktop/{youremail}@{emailclient.com}@www.mendeley.com.sqlite" | |
# Connect to the database | |
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path) | |
# Some of the tables available in the Mendeley database | |
head(dbListTables(mendeley.connection),n=10) | |
# The variables available in the 'Documents' table | |
dbListFields(mendeley.connection,"Documents") | |
extract.table <- function(con,query){ | |
res <- dbSendQuery(con,query) # Send query | |
table <- dbFetch(res) # Fetch table | |
dbClearResult(res) # Free resources | |
return(table) | |
} | |
### Folder | |
dbListFields(mendeley.connection,"Folders") | |
folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders") | |
dbListFields(mendeley.connection,"DocumentFolders") | |
document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders") | |
# Connect to the database | |
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path) | |
# Some of the tables available in the Mendeley database | |
head(dbListTables(mendeley.connection),n=10) | |
# The variables available in the 'Documents' table | |
dbListFields(mendeley.connection,"Documents") | |
extract.table <- function(con,query){ | |
res <- dbSendQuery(con,query) # Send query | |
table <- dbFetch(res) # Fetch table | |
dbClearResult(res) # Free resources | |
return(table) | |
} | |
### Folder | |
dbListFields(mendeley.connection,"Folders") | |
folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders") | |
dbListFields(mendeley.connection,"DocumentFolders") | |
document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders") | |
relevant.folder.name <- "Networks" | |
# Extract interal ID for folder of interest | |
relevant.folder.id <- (folders %>% | |
filter(name == relevant.folder.name))$id[1] | |
# Extract internal IDs for all papers belonging to the folder of interest, using the folder ID | |
relevant.papers <- (document.folders %>% | |
filter(folderId == relevant.folder.id))$documentId | |
head(relevant.papers) | |
### Documents | |
# Collect title and citation key for all relevant documents | |
relevant.documents <- extract.table(mendeley.connection,"SELECT id, title, citationKey FROM Documents") %>% | |
filter(id %in% relevant.papers) %>% | |
rename(documentId = id) | |
### Authors | |
dbListFields(mendeley.connection,"DocumentContributors") | |
# Collect and concatenate authors for all relevant documents | |
authors <- extract.table(mendeley.connection,"SELECT * FROM DocumentContributors") | |
head(authors) | |
unique(authors$contribution) | |
# Collect and concatenate authors for all relevant documents | |
relevant.authors <- authors %>% | |
filter(contribution == "DocumentAuthor", | |
documentId %in% relevant.papers) %>% | |
mutate(fullName = paste(lastName,firstNames,sep=", ")) %>% # Concatenate first and last name | |
select(documentId,fullName) %>% | |
group_by(documentId) %>% | |
summarize(authorsNames = paste(fullName,collapse="; ")) # Concatenate all authors of a document together | |
### Tags | |
dbListFields(mendeley.connection,"DocumentTags") | |
# Collect and concatenate tags for all relevant documents | |
relevant.tags <- extract.table(mendeley.connection, "SELECT * FROM DocumentTags") %>% | |
filter(documentId %in% relevant.papers) %>% | |
group_by(documentId) %>% | |
summarize(tagList = paste(tag,collapse="; ")) | |
### Notes | |
dbListFields(mendeley.connection,"FileNotes") | |
# Collect notes | |
relevant.notes <- extract.table(mendeley.connection,"SELECT documentId, note FROM FileNotes") %>% | |
filter(documentId %in% relevant.papers) %>% | |
rowwise() %>% | |
mutate(type = tolower(trimws(unlist(strsplit(note,split=":")))[1]), # Extract for each document whether it is a 'Goal' or a 'Key' | |
note = paste(unlist(strsplit(note,split=":"))[-1])) %>% # Extract the actual note | |
ungroup() %>% | |
spread(type,note) | |
### Final Dataset | |
# Join the datasets together | |
relevant.files <- relevant.documents %>% | |
left_join(relevant.authors, by="documentId") %>% | |
left_join(relevant.tags, by="documentId") %>% | |
left_join(relevant.notes, by="documentId") | |
head(relevant.files) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment