mathieubray/MendeleyExtractor.R

## MendeleyExtractor.R
library(RSQLite) # Database
library(dplyr) # This really should be loaded by default always...
library(tidyr) # 'spread' function to change table from 'long' to 'wide' format

# Uncomment the following line with your Mendeley path.
# See here for more information: http://support.mendeley.com/customer/en/portal/articles/227951-how-do-i-locate-mendeley-desktop-database-files-on-my-computer-

# mendeley.path = "C:/Users/{username}/AppData/Local/Mendeley Ltd./Mendeley Desktop/{youremail}@{emailclient.com}@www.mendeley.com.sqlite"

# Connect to the database
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path)

# Some of the tables available in the Mendeley database
head(dbListTables(mendeley.connection),n=10)

# The variables available in the 'Documents' table
dbListFields(mendeley.connection,"Documents")


extract.table <- function(con,query){

  res <- dbSendQuery(con,query) # Send query

  table <- dbFetch(res) # Fetch table

  dbClearResult(res) # Free resources

  return(table)

}

### Folder

dbListFields(mendeley.connection,"Folders")

folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders")

dbListFields(mendeley.connection,"DocumentFolders")

document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders")

# Connect to the database
mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path)

# Some of the tables available in the Mendeley database
head(dbListTables(mendeley.connection),n=10)

# The variables available in the 'Documents' table
dbListFields(mendeley.connection,"Documents")


extract.table <- function(con,query){

  res <- dbSendQuery(con,query) # Send query

  table <- dbFetch(res) # Fetch table

  dbClearResult(res) # Free resources

  return(table)

}

### Folder

dbListFields(mendeley.connection,"Folders")

folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders")

dbListFields(mendeley.connection,"DocumentFolders")

document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders")

relevant.folder.name <- "Networks"

# Extract interal ID for folder of interest
relevant.folder.id <- (folders %>%
                         filter(name == relevant.folder.name))$id[1]

# Extract internal IDs for all papers belonging to the folder of interest, using the folder ID
relevant.papers <- (document.folders %>%
                      filter(folderId == relevant.folder.id))$documentId

head(relevant.papers)


### Documents

# Collect title and citation key for all relevant documents
relevant.documents <- extract.table(mendeley.connection,"SELECT id, title, citationKey FROM Documents") %>%
  filter(id %in% relevant.papers) %>%
  rename(documentId = id)


### Authors

dbListFields(mendeley.connection,"DocumentContributors")

# Collect and concatenate authors for all relevant documents
authors <- extract.table(mendeley.connection,"SELECT * FROM DocumentContributors")

head(authors)

unique(authors$contribution)

# Collect and concatenate authors for all relevant documents
relevant.authors <- authors %>%
  filter(contribution == "DocumentAuthor",
         documentId %in% relevant.papers) %>%
  mutate(fullName = paste(lastName,firstNames,sep=", ")) %>% # Concatenate first and last name
  select(documentId,fullName) %>%
  group_by(documentId) %>%
  summarize(authorsNames = paste(fullName,collapse="; ")) # Concatenate all authors of a document together


### Tags

dbListFields(mendeley.connection,"DocumentTags")

# Collect and concatenate tags for all relevant documents
relevant.tags <- extract.table(mendeley.connection, "SELECT * FROM DocumentTags") %>%
  filter(documentId %in% relevant.papers) %>%
  group_by(documentId) %>%
  summarize(tagList = paste(tag,collapse="; "))


### Notes

dbListFields(mendeley.connection,"FileNotes")

# Collect notes
relevant.notes <- extract.table(mendeley.connection,"SELECT documentId, note FROM FileNotes") %>%
  filter(documentId %in% relevant.papers) %>%
  rowwise() %>%
  mutate(type = tolower(trimws(unlist(strsplit(note,split=":")))[1]), # Extract for each document whether it is a 'Goal' or a 'Key'
         note = paste(unlist(strsplit(note,split=":"))[-1])) %>% # Extract the actual note
  ungroup() %>%
  spread(type,note)


### Final Dataset

# Join the datasets together
relevant.files <- relevant.documents %>%
  left_join(relevant.authors, by="documentId") %>%
  left_join(relevant.tags, by="documentId") %>%
  left_join(relevant.notes, by="documentId")

head(relevant.files)
	library(RSQLite) # Database
	library(dplyr) # This really should be loaded by default always...
	library(tidyr) # 'spread' function to change table from 'long' to 'wide' format

	# Uncomment the following line with your Mendeley path.
	# See here for more information: http://support.mendeley.com/customer/en/portal/articles/227951-how-do-i-locate-mendeley-desktop-database-files-on-my-computer-

	# mendeley.path = "C:/Users/{username}/AppData/Local/Mendeley Ltd./Mendeley Desktop/{youremail}@{emailclient.com}@www.mendeley.com.sqlite"

	# Connect to the database
	mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path)

	# Some of the tables available in the Mendeley database
	head(dbListTables(mendeley.connection),n=10)

	# The variables available in the 'Documents' table
	dbListFields(mendeley.connection,"Documents")


	extract.table <- function(con,query){

	res <- dbSendQuery(con,query) # Send query

	table <- dbFetch(res) # Fetch table

	dbClearResult(res) # Free resources

	return(table)

	}

	### Folder

	dbListFields(mendeley.connection,"Folders")

	folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders")

	dbListFields(mendeley.connection,"DocumentFolders")

	document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders")

	# Connect to the database
	mendeley.connection = dbConnect(RSQLite::SQLite(),mendeley.path)

	# Some of the tables available in the Mendeley database
	head(dbListTables(mendeley.connection),n=10)

	# The variables available in the 'Documents' table
	dbListFields(mendeley.connection,"Documents")


	extract.table <- function(con,query){

	res <- dbSendQuery(con,query) # Send query

	table <- dbFetch(res) # Fetch table

	dbClearResult(res) # Free resources

	return(table)

	}

	### Folder

	dbListFields(mendeley.connection,"Folders")

	folders <- extract.table(mendeley.connection, "SELECT id, name FROM Folders")

	dbListFields(mendeley.connection,"DocumentFolders")

	document.folders <- extract.table(mendeley.connection, "SELECT folderId, documentId FROM DocumentFolders")

	relevant.folder.name <- "Networks"

	# Extract interal ID for folder of interest
	relevant.folder.id <- (folders %>%
	filter(name == relevant.folder.name))$id[1]

	# Extract internal IDs for all papers belonging to the folder of interest, using the folder ID
	relevant.papers <- (document.folders %>%
	filter(folderId == relevant.folder.id))$documentId

	head(relevant.papers)


	### Documents

	# Collect title and citation key for all relevant documents
	relevant.documents <- extract.table(mendeley.connection,"SELECT id, title, citationKey FROM Documents") %>%
	filter(id %in% relevant.papers) %>%
	rename(documentId = id)


	### Authors

	dbListFields(mendeley.connection,"DocumentContributors")

	# Collect and concatenate authors for all relevant documents
	authors <- extract.table(mendeley.connection,"SELECT * FROM DocumentContributors")

	head(authors)

	unique(authors$contribution)

	# Collect and concatenate authors for all relevant documents
	relevant.authors <- authors %>%
	filter(contribution == "DocumentAuthor",
	documentId %in% relevant.papers) %>%
	mutate(fullName = paste(lastName,firstNames,sep=", ")) %>% # Concatenate first and last name
	select(documentId,fullName) %>%
	group_by(documentId) %>%
	summarize(authorsNames = paste(fullName,collapse="; ")) # Concatenate all authors of a document together


	### Tags

	dbListFields(mendeley.connection,"DocumentTags")

	# Collect and concatenate tags for all relevant documents
	relevant.tags <- extract.table(mendeley.connection, "SELECT * FROM DocumentTags") %>%
	filter(documentId %in% relevant.papers) %>%
	group_by(documentId) %>%
	summarize(tagList = paste(tag,collapse="; "))


	### Notes

	dbListFields(mendeley.connection,"FileNotes")

	# Collect notes
	relevant.notes <- extract.table(mendeley.connection,"SELECT documentId, note FROM FileNotes") %>%
	filter(documentId %in% relevant.papers) %>%
	rowwise() %>%
	mutate(type = tolower(trimws(unlist(strsplit(note,split=":")))[1]), # Extract for each document whether it is a 'Goal' or a 'Key'
	note = paste(unlist(strsplit(note,split=":"))[-1])) %>% # Extract the actual note
	ungroup() %>%
	spread(type,note)


	### Final Dataset

	# Join the datasets together
	relevant.files <- relevant.documents %>%
	left_join(relevant.authors, by="documentId") %>%
	left_join(relevant.tags, by="documentId") %>%
	left_join(relevant.notes, by="documentId")

	head(relevant.files)