Last active
February 28, 2017 02:44
-
-
Save ghuertaramos/f043a216138e6605560512e1a7d77350 to your computer and use it in GitHub Desktop.
Script to count aminoacids from a protein sequence
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
##Este script cuenta el número de aminoácidos que contiene una serie de proteínas definidas por el usuario | |
##This script counts aminoacids from a series of proteins defined by the user | |
#Requiere de un archivo de entrada "input.txt" que contenga las claves NCBI separados por espacios (p.ej. AAA84121.1 BAA83246.1) | |
#It requires an input file "input.txt" containing NCBI keys saparated by spaces for the desired proteins (e.g. AAA84121.1 BAA83246.1) | |
#Genera un directorio de trabajo | |
#Generates a working directory | |
mkdir Amino | |
#Baja las secuencias en formato fasta y genera archivos intermedios que contienen sólo la secuencia de aminoácidos | |
#Downloads sequences in fasta format then it generates intermediate files containing only aminoacid sequences | |
for i in `cat input.txt`; | |
do curl -s "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=protein&rettype=fasta&id=$i"|grep -v " * "|tr -d '[:space:]' > Amino/$i.fasta; | |
done | |
#Cuenta los caracteres dentro de los archivos intermedios y genera el archivo final | |
#Counts the characters from intermediate files and generates the final file | |
wc -m Amino/*fasta > Amino/conteo.txt | |
#Fin del script | |
#End of script |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment