/download_kegg.sh

## download_kegg.sh
# Get a list of all organisms
curl -s  "http://rest.kegg.jp/list/organism"  > organisms-all.txt

# Get just a few of interest
cat organisms-all.txt | awk '$2~/^(hsa|mmu|rno|cfa|bta|gga|xla|xtr|dre|dme|cel|ath|ehi|tgo|eco|sau|mtu|mav|cje|ccol)$/' > organisms-of-interest.txt

# Get the accession codes for each
cut -f1 organisms-of-interest.txt > organisms-of-interest-codes.txt

# Make a directory to put all the kgml files downloaded
mkdir -p kgml

# Write a script that will download all the kgml files for all the organisms of interest
cat organisms-of-interest-codes.txt | while read code; do echo "curl 'http://rest.kegg.jp/list/pathway/$code' | cut -f1 | while read path; do curl -o \"kgml/\${path}.xml\" \"http://rest.kegg.jp/get/\${path}/kgml\"; done "; done > scrape-kegg-2-curl-loops.sh

# Get in tmux and Run it
bash scrape-kegg-2-curl-loops.sh
	# Get a list of all organisms
	curl -s "http://rest.kegg.jp/list/organism" > organisms-all.txt

	# Get just a few of interest
	cat organisms-all.txt \| awk '$2~/^(hsa\|mmu\|rno\|cfa\|bta\|gga\|xla\|xtr\|dre\|dme\|cel\|ath\|ehi\|tgo\|eco\|sau\|mtu\|mav\|cje\|ccol)$/' > organisms-of-interest.txt

	# Get the accession codes for each
	cut -f1 organisms-of-interest.txt > organisms-of-interest-codes.txt

	# Make a directory to put all the kgml files downloaded
	mkdir -p kgml

	# Write a script that will download all the kgml files for all the organisms of interest
	cat organisms-of-interest-codes.txt \| while read code; do echo "curl 'http://rest.kegg.jp/list/pathway/$code' \| cut -f1 \| while read path; do curl -o \"kgml/\${path}.xml\" \"http://rest.kegg.jp/get/\${path}/kgml\"; done "; done > scrape-kegg-2-curl-loops.sh

	# Get in tmux and Run it
	bash scrape-kegg-2-curl-loops.sh