crazyhottommy/Entrez_Direct.sh

## Entrez_Direct.sh
# search pubmed contains "glioblastoma enhancer"
$esearch -db pubmed -query "glioblastoma enhancer"
<ENTREZ_DIRECT>
  <Db>pubmed</Db>
  <WebEnv>NCID_1_539964707_130.14.18.34_9001_1422280320_2091337226_0MetA0_S_MegaStore_F_1</WebEnv>
  <QueryKey>1</QueryKey>
  <Count>97</Count>
  <Step>1</Step>
</ENTREZ_DIRECT>

# search pubmed with title contains "glioblastoma enhancer" returned 0 count
$esearch -db pubmed -query "glioblastoma enhancer [TITL]"
<ENTREZ_DIRECT>
  <Db>pubmed</Db>
  <WebEnv>NCID_1_23683635_130.14.22.215_9001_1422280849_1465220088_0MetA0_S_MegaStore_F_1</WebEnv>
  <QueryKey>1</QueryKey>
  <Count>0</Count>
  <Step>1</Step>
</ENTREZ_DIRECT>

#fetch the abstract
$esearch -db pubmed -query "glioblastoma enhancer" | efetch -format abstract > glioblastoma.txt

#check the abstracts
$ less -S glioblastoma.txt
# how many papers?
$cat glioblastoma.txt | grep PMID | wc -l
97

# fetch the protein sequences of human CTCF
$esearch -db protein  -query "Homo sapiens [ORGN] AND CTCF[GENE]" | efetch -format fasta > CTCF_protein.fa
# fetch the nucleotide sequences of human CTCF
$esearch -db nucleotide  -query "Homo sapiens [ORGN] AND CTCF[GENE]" | efetch -format fasta > CTCF_nucleotide.fa
# in genebank format
$esearch -db nucleotide  -query "Homo sapiens [ORGN] AND CTCF[GENE]" | efetch -format gb > CTCF_nucleotide.gb

# From a biostar post https://www.biostars.org/p/92671/
#Given a Gene ID, download the aminoacid sequences of the corresponding Proteins, keeping only the reviewed entries (e.g. no putative, predicted sequences):
$esearch -db gene -query "1234[id]" | elink -target protein | efilter -query "REVIEWED[FILTER]"| efetch -format fasta

#Given a file containing a list of Gene IDs (one per line), download all the entries in tabular format:
$esearch -db gene -query $(paste -s -d ','  mygenes.ids) | efetch -format tabular > mygenes.details.txt
	# search pubmed contains "glioblastoma enhancer"
	$esearch -db pubmed -query "glioblastoma enhancer"
	<ENTREZ_DIRECT>
	<Db>pubmed</Db>
	<WebEnv>NCID_1_539964707_130.14.18.34_9001_1422280320_2091337226_0MetA0_S_MegaStore_F_1</WebEnv>
	<QueryKey>1</QueryKey>
	<Count>97</Count>
	<Step>1</Step>
	</ENTREZ_DIRECT>

	# search pubmed with title contains "glioblastoma enhancer" returned 0 count
	$esearch -db pubmed -query "glioblastoma enhancer [TITL]"
	<ENTREZ_DIRECT>
	<Db>pubmed</Db>
	<WebEnv>NCID_1_23683635_130.14.22.215_9001_1422280849_1465220088_0MetA0_S_MegaStore_F_1</WebEnv>
	<QueryKey>1</QueryKey>
	<Count>0</Count>
	<Step>1</Step>
	</ENTREZ_DIRECT>

	#fetch the abstract
	$esearch -db pubmed -query "glioblastoma enhancer" \| efetch -format abstract > glioblastoma.txt

	#check the abstracts
	$ less -S glioblastoma.txt
	# how many papers?
	$cat glioblastoma.txt \| grep PMID \| wc -l
	97

	# fetch the protein sequences of human CTCF
	$esearch -db protein -query "Homo sapiens [ORGN] AND CTCF[GENE]" \| efetch -format fasta > CTCF_protein.fa
	# fetch the nucleotide sequences of human CTCF
	$esearch -db nucleotide -query "Homo sapiens [ORGN] AND CTCF[GENE]" \| efetch -format fasta > CTCF_nucleotide.fa
	# in genebank format
	$esearch -db nucleotide -query "Homo sapiens [ORGN] AND CTCF[GENE]" \| efetch -format gb > CTCF_nucleotide.gb

	# From a biostar post https://www.biostars.org/p/92671/
	#Given a Gene ID, download the aminoacid sequences of the corresponding Proteins, keeping only the reviewed entries (e.g. no putative, predicted sequences):
	$esearch -db gene -query "1234[id]" \| elink -target protein \| efilter -query "REVIEWED[FILTER]"\| efetch -format fasta

	#Given a file containing a list of Gene IDs (one per line), download all the entries in tabular format:
	$esearch -db gene -query $(paste -s -d ',' mygenes.ids) \| efetch -format tabular > mygenes.details.txt