Skip to content

Instantly share code, notes, and snippets.

@kumeS
Last active July 10, 2020 10:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kumeS/77f9f3797650be80535ca296d292e663 to your computer and use it in GitHub Desktop.
Save kumeS/77f9f3797650be80535ca296d292e663 to your computer and use it in GitHub Desktop.
To search the PubMed DB and translate the abstract to the Japanese text.
##RISmed
#https://cran.r-project.org/web/packages/RISmed/index.html
#RISmed: NCBIデータベースからコンテンツをダウンロードするRパッケージである。
#PubMedを含むNational Center for Biotechnology Information (NCBI)
#のデータベースから書誌内容を抽出するためのツール群である。
#参考文献
#https://rpubs.com/Algo1970/116830
#https://datascienceplus.com/search-pubmed-with-rismed/
#https://amunategui.github.io/pubmed-query/
#https://stackoverflow.com/questions/30446905/rmarkdown-font-size-and-header/30447045
##plotly.com
#https://plotly.com/r/
######################################################################
if(!require("RISmed")){install.packages("RISmed")}; library(RISmed)
if(!require("magrittr")){install.packages("magrittr")}; library(magrittr)
if(!require("purrr")){install.packages("purrr")}; library(purrr)
if(!require("plotly")){install.packages("plotly")}; library(plotly)
if(!require("progress")){install.packages("progress")}; library(progress)
if(!require("reticulate")){install.packages("reticulate")}; library(reticulate)
########################################
# PubMed全体のキーワード検索
SearchTerm <- "3d electron microscopy"
esearchResults <- SearchTerm %>%
EUtilsSummary(query=., type="esearch", db="pubmed", retmax=10000)
summary(esearchResults)
str(esearchResults)
#2020年の論文に対して、キーワード検索
Year <- 2020
SearchTerm <- "3d electron microscopy"
esearchResults2020 <- SearchTerm %>%
EUtilsSummary(query=., type="esearch", db="pubmed", mindate=Year, maxdate=Year, retmax=10000)
#ヒット数
esearchResults2020@count
#PMIDを出力する
PubID <- QueryId(esearchResults2020)
PubID
#1つ目の論文の情報取得
PubIDResults <- PubID[1] %>%
EUtilsGet(type="efetch", db="pubmed")
#Mesh headingsを調べる
Mesh(PubIDResults)
#[[1]]
#[1] NA
#この論文はMeshが付与されてないみたい
PubIDResults
#str(PubIDResults)
#データをリストにする
Results <- list(PubIDResults@PMID,
PubIDResults@Author,
PubIDResults@Title,
PubIDResults@ArticleTitle,
PubIDResults@AbstractText)
##年ごとの論文数をまとめる
PubNumber <- function(i){
r <- EUtilsSummary(terms, type='esearch', db='pubmed', mindate=i, maxdate=i)
return(QueryCount(r))
}
#Coronavirus publications in PubMed
terms <- "Coronavirus"
Years <- 1970:2020
PubNum <- purrr::map(Years, PubNumber)
Data <- data.frame(Years, PubNumber=unlist(PubNum))
fig <- plot_ly(Data, x = ~Years, y = ~PubNumber, type = 'bar', name = 'Publications',
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 1)))
fig <- fig %>% layout(title = paste("Number of PubMed articles containing ", terms, sep=""),
yaxis = list(title = 'Count'),
xaxis = list(title = "Year"))
fig
#Deep learning publications in PubMed
terms <- "deep learning"
Years <- 1970:2020
PubNum <- purrr::map(Years, PubNumber)
Data <- data.frame(Years, PubNumber=unlist(PubNum))
fig <- plot_ly(Data, x = ~Years, y = ~PubNumber, type = 'bar', name = 'Publications',
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 1)))
fig <- fig %>% layout(title = paste("Number of PubMed articles containing ", terms, sep=""),
yaxis = list(title = 'Count'),
xaxis = list(title = "Year"))
fig
#Create the function
PublicationPerYear <- function(Term="Coronavirus"){
PubNumber <- c()
TERM <- Term
x <- 1
Years <- 1970:2020
pb <- progress_bar$new(format = " Process [:bar] :percent eta: :eta",
total = length(Years), clear = FALSE, width= 50)
for (i in Years){
r <- EUtilsSummary(TERM, type='esearch', db='pubmed', mindate=i, maxdate=i)
PubNumber[x] <- QueryCount(r)
x <- x + 1
pb$tick()
}
Data <- data.frame(Years, PubNumber)
library(plotly)
fig <- plot_ly(Data, x = ~Years, y = ~PubNumber, type = 'bar', name = 'Publications',
marker = list(color = 'rgb(158,202,225)', line = list(color = 'rgb(8,48,107)', width = 1)))
fig <- fig %>% layout(title = paste("Number of PubMed articles containing ", TERM, sep=""),
yaxis = list(title = 'Count'),
xaxis = list(title = "Year"))
return(fig)
}
#source("https://gist.githubusercontent.com/kumeS/a9e612c6bb484451e6328f119fd9ef56/raw/481884897a54e51bc67b7dbd794fcfe9404a8166/PublicationPerYear.R")
PublicationPerYear(Term="deep learning")
########################################
# Main topics
# pubmed id + Abstract検索 + 翻訳 + cutText
########################################
translatedResults <- Results
reticulate::use_python("/usr/local/bin/python", required =T)
gt <- reticulate::import(module = "googletrans")$Translator()
TEXT01 <- gsub("&quot;", "", translatedResults[[5]])
translateResults01 <- gt$translate(text=Results[[3]], src="en", dest='ja')
translateResults02 <- gt$translate(text=Results[[4]], src="en", dest='ja')
translateResults03 <- gt$translate(text=TEXT01, src="en", dest='ja')
"CutText" <- function(x){
strsplit(strsplit(as.character(x), ", text=")[[1]][2], ", pronunciation=")[[1]][1]
}
translatedResults[[3]][2] <-CutText(translateResults01)
translatedResults[[4]][2] <-CutText(translateResults02)
translatedResults[[5]][2] <-CutText(translateResults03)
translatedResults
system('wget https://gist.githubusercontent.com/kumeS/8a4410c7f9d07a0b49192bcda4bf1e03/raw/b8db496b0c13fa98affd654f03d45e17cf41ea08/Basic_report_jpn.Rmd')
rmarkdown::render("Basic_report_jpn.Rmd",
output_format = "html_document",
output_file = "output.html")
browseURL("output.html")
##### Report output
### https://kumes.github.io/Blog/Search_PubMed/report_output.html
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment