Last active
June 16, 2020 19:08
-
-
Save mathieubray/c7073f542f54b52d93247544521ad4a2 to your computer and use it in GitHub Desktop.
Extract Mendeley Notes Using R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RSQLite) # Database | |
library(dplyr) # This really should be loaded by default always... | |
library(tidyr) # 'spread' function to change table from 'long' to 'wide' format | |
# Uncomment the following line with your Mendeley path. | |
# See here for more information: http://support.mendeley.com/customer/en/portal/articles/227951-how-do-i-locate-mendeley-desktop-database-files-on-my-computer- | |
# mendeley.path = "C:/Users/{username}/AppData/Local/Mendeley Ltd./Mendeley Desktop/{youremail}@{emailclient.com}@www.mendeley.com.sqlite" | |
# Connect to the database | |
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path) | |
# Some of the tables available in the Mendeley database | |
head(dbListTables(mendeley.connection),n=10) | |
# The variables available in the 'Documents' table | |
dbListFields(mendeley.connection,"Documents") | |
extract.table <- function(con,query){ | |
res <- dbSendQuery(con,query) # Send query | |
table <- dbFetch(res) # Fetch table | |
dbClearResult(res) # Free resources | |
return(table) | |
} | |
### Folder | |
dbListFields(mendeley.connection,"Folders") | |
folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders") | |
dbListFields(mendeley.connection,"DocumentFolders") | |
document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders") | |
# Connect to the database | |
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path) | |
# Some of the tables available in the Mendeley database | |
head(dbListTables(mendeley.connection),n=10) | |
# The variables available in the 'Documents' table | |
dbListFields(mendeley.connection,"Documents") | |
extract.table <- function(con,query){ | |
res <- dbSendQuery(con,query) # Send query | |
table <- dbFetch(res) # Fetch table | |
dbClearResult(res) # Free resources | |
return(table) | |
} | |
### Folder | |
dbListFields(mendeley.connection,"Folders") | |
folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders") | |
dbListFields(mendeley.connection,"DocumentFolders") | |
document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders") | |
relevant.folder.name <- "Networks" | |
# Extract interal ID for folder of interest | |
relevant.folder.id <- (folders %>% | |
filter(name == relevant.folder.name))$id[1] | |
# Extract internal IDs for all papers belonging to the folder of interest, using the folder ID | |
relevant.papers <- (document.folders %>% | |
filter(folderId == relevant.folder.id))$documentId | |
head(relevant.papers) | |
### Documents | |
# Collect title and citation key for all relevant documents | |
relevant.documents <- extract.table(mendeley.connection,"SELECT id, title, citationKey FROM Documents") %>% | |
filter(id %in% relevant.papers) %>% | |
rename(documentId = id) | |
### Authors | |
dbListFields(mendeley.connection,"DocumentContributors") | |
# Collect and concatenate authors for all relevant documents | |
authors <- extract.table(mendeley.connection,"SELECT * FROM DocumentContributors") | |
head(authors) | |
unique(authors$contribution) | |
# Collect and concatenate authors for all relevant documents | |
relevant.authors <- authors %>% | |
filter(contribution == "DocumentAuthor", | |
documentId %in% relevant.papers) %>% | |
mutate(fullName = paste(lastName,firstNames,sep=", ")) %>% # Concatenate first and last name | |
select(documentId,fullName) %>% | |
group_by(documentId) %>% | |
summarize(authorsNames = paste(fullName,collapse="; ")) # Concatenate all authors of a document together | |
### Tags | |
dbListFields(mendeley.connection,"DocumentTags") | |
# Collect and concatenate tags for all relevant documents | |
relevant.tags <- extract.table(mendeley.connection, "SELECT * FROM DocumentTags") %>% | |
filter(documentId %in% relevant.papers) %>% | |
group_by(documentId) %>% | |
summarize(tagList = paste(tag,collapse="; ")) | |
### Notes | |
dbListFields(mendeley.connection,"FileNotes") | |
# Collect notes | |
relevant.notes <- extract.table(mendeley.connection,"SELECT documentId, note FROM FileNotes") %>% | |
filter(documentId %in% relevant.papers) %>% | |
rowwise() %>% | |
mutate(type = tolower(trimws(unlist(strsplit(note,split=":")))[1]), # Extract for each document whether it is a 'Goal' or a 'Key' | |
note = paste(unlist(strsplit(note,split=":"))[-1])) %>% # Extract the actual note | |
ungroup() %>% | |
spread(type,note) | |
### Final Dataset | |
# Join the datasets together | |
relevant.files <- relevant.documents %>% | |
left_join(relevant.authors, by="documentId") %>% | |
left_join(relevant.tags, by="documentId") %>% | |
left_join(relevant.notes, by="documentId") | |
head(relevant.files) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment