Radhouane Aniba radaniba

## parsehtmltable.py
from BeautifulSoup import BeautifulSoup
import urllib
import xlwt
wb = xlwt.Workbook()
ws = wb.add_sheet('a test sheet')
f = urllib.urlopen("http://www.ebi.ac.uk/Tools/services/web/blastresult.ebi?tool=ncbiblast&jobId=ncbiblast-I20120714-161017-0108-80986175-pg&context=nucleotide")
html = f.read()
soup = BeautifulSoup(html)
#print soup.prettify()
#print soup

## readspreadsheet
### IMPORTS

require 'roo'
require 'pp'

### IMPLEMENTATION ###

# A Excel spreadsheet reader that can clean up column names and convert data.
#
# Assumptions: The data is read off the first sheet of the workbook. The sheet

## genomeanalysistasks
###### 1. explore sequence composition of human genome
library(BSgenome)
available.genomes()
library(BSgenome.Hsapiens.UCSC.hg18)

### get sequence for chromosome 1
Seq=Hsapiens[["chr1"]]
Seq # shows some summaries
Seq=unmasked(Seq)  ## remove the mask


## biblio.pl
#!perl -w
#
#   A client showing how to use Bio::Biblio module, a module for
#   accessing and querying a bibliographic repository.
#   It also shows how to use modules Bio::Biblio::IO::medlinexml
#   Bio::Biblio::IO::medline2ref which converts XML MEDLINE
#   citations into a simple hash table and into full Perl objects.
#
#   It has many options in order to cover as many methods as
#   possible.  Because of that, it can be also used as a fully

## add-accession-number.pl
#5. Add accession numbers and sequences to the tree -- now we're using PhyloXML's extra features.

from Bio.Phylo import PhyloXML

# Make a lookup table for sequences
lookup = dict((rec.id, str(rec.seq)) for rec in best_seqs)

for clade in egfr_tree.get_terminals():
    key = clade.name
    accession = PhyloXML.Accession(key, 'NCBI')

## extract-oligos.pl
#!/usr/bin/perl -w

# oligos.pl
# Create and analyze an overlapping series of oligos
# WI Bioinformatics course - Feb 2002 - Lecture 5
# WI Bioinformatics course - Revised - Sep 2003
# Example of input taken as multiple arguments

# Check input and give info if arguments are missing
if (! $ARGV[3])

## parsegenbank.pl
#! /usr/local/bin/perl -w

# Homemade Genbank report parser using regular expressions.
# Once desired data is captured, it can be printed in any format.
# WI Bioinformatics course - Feb 2002 - Lecture 6

$gb_report = "genbank_sample.txt";

open (GB, $gb_report) || die "cannot open $gb_report for reading: $!";

## parseblast.pl
#!/usr/local/bin/perl -w

# Parsing BLAST reports with BioPerl's Bio::Tools::BPlite module
# WI Bioinformatics course - Feb 2002 - Lecture 6

# See documentation at http://www.bioperl.org/Core/POD/Bio/Tools/BPlite.html

use Bio::Tools::BPlite;

# Prompt the user for the file name if it's not an argument

## runpatscan.pl
#!/usr/bin/perl -w

# patscan_batch.pl
# Run patscan on all seqs in a folder
# Can be easily modified to run any command on every sequence in a folder
# WI Bioinformatics course - Feb 2002 - Lecture 5
# Revised - Sep 2003
################  User-supplied variables  #############

# Directory of sequences

## geneannotate.py
#If you deal with a large quantity of gene IDs (such as the ones produced by microarray analysis), annotating them is important if you want to determine their potential biological meaning. However, a lot of annotation systems are only web-based, or do not work with Python.

#Thanks to the Entrez module it is possible to annotate batches of Entrez Gene IDs without trouble, using the "retrieve_ids" function provided below.

#This example assumes you have a list of Entrez Gene IDs. Note: they should be stored as strings, rather than integers, even if they are numbers.

import sys

from Bio import Entrez
	from BeautifulSoup import BeautifulSoup
	import urllib
	import xlwt
	wb = xlwt.Workbook()
	ws = wb.add_sheet('a test sheet')
	f = urllib.urlopen("http://www.ebi.ac.uk/Tools/services/web/blastresult.ebi?tool=ncbiblast&jobId=ncbiblast-I20120714-161017-0108-80986175-pg&context=nucleotide")
	html = f.read()
	soup = BeautifulSoup(html)
	#print soup.prettify()
	#print soup
	### IMPORTS

	require 'roo'
	require 'pp'

	### IMPLEMENTATION ###

	# A Excel spreadsheet reader that can clean up column names and convert data.
	#
	# Assumptions: The data is read off the first sheet of the workbook. The sheet
	###### 1. explore sequence composition of human genome
	library(BSgenome)
	available.genomes()
	library(BSgenome.Hsapiens.UCSC.hg18)

	### get sequence for chromosome 1
	Seq=Hsapiens[["chr1"]]
	Seq # shows some summaries
	Seq=unmasked(Seq) ## remove the mask
	#!perl -w
	#
	# A client showing how to use Bio::Biblio module, a module for
	# accessing and querying a bibliographic repository.
	# It also shows how to use modules Bio::Biblio::IO::medlinexml
	# Bio::Biblio::IO::medline2ref which converts XML MEDLINE
	# citations into a simple hash table and into full Perl objects.
	#
	# It has many options in order to cover as many methods as
	# possible. Because of that, it can be also used as a fully
	#5. Add accession numbers and sequences to the tree -- now we're using PhyloXML's extra features.

	from Bio.Phylo import PhyloXML

	# Make a lookup table for sequences
	lookup = dict((rec.id, str(rec.seq)) for rec in best_seqs)

	for clade in egfr_tree.get_terminals():
	key = clade.name
	accession = PhyloXML.Accession(key, 'NCBI')
	#!/usr/bin/perl -w

	# oligos.pl
	# Create and analyze an overlapping series of oligos
	# WI Bioinformatics course - Feb 2002 - Lecture 5
	# WI Bioinformatics course - Revised - Sep 2003
	# Example of input taken as multiple arguments

	# Check input and give info if arguments are missing
	if (! $ARGV[3])
	#! /usr/local/bin/perl -w

	# Homemade Genbank report parser using regular expressions.
	# Once desired data is captured, it can be printed in any format.
	# WI Bioinformatics course - Feb 2002 - Lecture 6

	$gb_report = "genbank_sample.txt";

	open (GB, $gb_report) \|\| die "cannot open $gb_report for reading: $!";
	#!/usr/local/bin/perl -w

	# Parsing BLAST reports with BioPerl's Bio::Tools::BPlite module
	# WI Bioinformatics course - Feb 2002 - Lecture 6

	# See documentation at http://www.bioperl.org/Core/POD/Bio/Tools/BPlite.html

	use Bio::Tools::BPlite;

	# Prompt the user for the file name if it's not an argument
	#!/usr/bin/perl -w

	# patscan_batch.pl
	# Run patscan on all seqs in a folder
	# Can be easily modified to run any command on every sequence in a folder
	# WI Bioinformatics course - Feb 2002 - Lecture 5
	# Revised - Sep 2003
	################ User-supplied variables #############

	# Directory of sequences
	#If you deal with a large quantity of gene IDs (such as the ones produced by microarray analysis), annotating them is important if you want to determine their potential biological meaning. However, a lot of annotation systems are only web-based, or do not work with Python.

	#Thanks to the Entrez module it is possible to annotate batches of Entrez Gene IDs without trouble, using the "retrieve_ids" function provided below.

	#This example assumes you have a list of Entrez Gene IDs. Note: they should be stored as strings, rather than integers, even if they are numbers.

	import sys

	from Bio import Entrez