radaniba/report_generator.py

## report_generator.py
"""
This is a small python script to generate a report based on multiple analysis from bismark
It creates a single doncument (report) as a table
Rows are data parsed
Columns will be samples analyzed

The script takes as argument :
    - a path where we have several bismark individual results
    - an output filename

"""
from __future__ import division

__author__ = "Rad <aradwen@gmail.com>"
__license__ = "GNU General Public License version 3"
__date__ = "06/30/2015"
__version__ = "0.1"

try:
    import os
    import pandas as pd
    import datetime
    import logging as log
    import subprocess
    from prettytable import PrettyTable
    from argparse import ArgumentParser
except ImportError:
    # Checks the installation of the necessary python modules
    import os
    import sys

    print((os.linesep * 2).join(
        ["An error found importing one module:", str(sys.exc_info()[1]), "You need to install it Stopping..."]))
    sys.exit(-2)


def init_log():
    current_time = datetime.datetime.now()
    logger = log.getLogger(__name__)
    logger.setLevel(log.INFO)
    handler = log.FileHandler('report_generator.log')
    handler.setLevel(log.INFO)
    # create a logging format
    formatter = log.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    return logger

def arguments():
    """Defines the command line arguments for the script."""
    main_desc = """Generates a report from different bismark outputs """

    parser = ArgumentParser(description=main_desc)
    parser.add_argument("path", default=os.path.expanduser('~'), nargs='?',
                        help="The path containing different results from bismark")
    parser.add_argument("-o", "--output", default="report.summary", help="report summarizing results across data")
    parser.add_argument("-f", "--features", help="List of features to be extracted")
    parser.add_argument("-v", "--version", action="version",
                        version="%(prog)s {0}".format(__version__),
                        help="show program's version number and exit")
    return parser


class Report(object):

    def __init__(self, **kwargs):
        self.path = None
        self.list_of_features = None
        self.output = None
        for (key, value) in kwargs.iteritems():
            if hasattr(self, key):
                setattr(self, key, value)
        #self._log = init_log()

    def load_features(self):
        """
        :return: A Dataframe with a single column called features
        """
        features = [line.rstrip('\n') for line in open(self.list_of_features) if not line[0].isspace()]
        df = pd.DataFrame()
        df['features'] = features
        return df

    def extract_feature(self, draft_report, sample_bismark_output):
        """
        :param draft_report: is basically the dataframe being built
                            each time we read a sample result
                            we add a column with the features specified
        :param sample_bismark_output: the bismark output for a specific sample
        :return:dataframe populated with samples
        """
        sample_name = os.path.basename(sample_bismark_output).split(".txt")[0]
        list = draft_report.features
        result = []
        for item in list:
            with open(sample_bismark_output) as input_file:
                for line in input_file:
                    #print line.split(":")[0]
                    if line.split(":")[0] == item:
                        #print item
                        result.append(line.split(":")[1].rstrip("\n"))
        draft_report[sample_name] = result
        return draft_report

    def list_files(self):
        ls_files = []
        ls_labels = []
        for sName in os.listdir(self.path):
            if os.path.isfile(os.path.join(self.path, sName)) and sName.endswith(".txt"):
                ls_files.append(os.path.join(self.path, sName))
                fileName, fileExtension = os.path.splitext(sName)
                sName = os.path.basename(fileName).split('.')[0]
                ls_labels.append(sName)
        return ls_files

    def generate(self):
        df_temp = self.load_features()
        #print df_temp.features
        list_of_bismarks = self.list_files()
        #print list_of_bismarks
        for sample in list_of_bismarks:
            report = self.extract_feature(df_temp, sample)
        report.to_csv(self.output, index=False, sep='\t', encoding='utf-8')


def main():

    args = arguments().parse_args()
    report = Report()
    # Set parameters
    log = init_log()
    log.info("Starting the report generator...")
    try:
        log.info("Loading features from " + args.features)
        report.list_of_features = args.features
        log.info("Loading bismark results from  " + args.path)
        report.path = args.path
        report.output = args.output
    except Exception, e:
        log.exception(e)

    log.info("Started Generating report ..  ")
    report.generate()
    log.info("Finished Generating report ..  ")
    log.info("Report saved to " + args.output)

if __name__ == "__main__":
    main()
	"""
	This is a small python script to generate a report based on multiple analysis from bismark
	It creates a single doncument (report) as a table
	Rows are data parsed
	Columns will be samples analyzed

	The script takes as argument :
	- a path where we have several bismark individual results
	- an output filename

	"""
	from __future__ import division

	__author__ = "Rad <aradwen@gmail.com>"
	__license__ = "GNU General Public License version 3"
	__date__ = "06/30/2015"
	__version__ = "0.1"

	try:
	import os
	import pandas as pd
	import datetime
	import logging as log
	import subprocess
	from prettytable import PrettyTable
	from argparse import ArgumentParser
	except ImportError:
	# Checks the installation of the necessary python modules
	import os
	import sys

	print((os.linesep * 2).join(
	["An error found importing one module:", str(sys.exc_info()[1]), "You need to install it Stopping..."]))
	sys.exit(-2)


	def init_log():
	current_time = datetime.datetime.now()
	logger = log.getLogger(__name__)
	logger.setLevel(log.INFO)
	handler = log.FileHandler('report_generator.log')
	handler.setLevel(log.INFO)
	# create a logging format
	formatter = log.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
	handler.setFormatter(formatter)
	logger.addHandler(handler)
	return logger

	def arguments():
	"""Defines the command line arguments for the script."""
	main_desc = """Generates a report from different bismark outputs """

	parser = ArgumentParser(description=main_desc)
	parser.add_argument("path", default=os.path.expanduser('~'), nargs='?',
	help="The path containing different results from bismark")
	parser.add_argument("-o", "--output", default="report.summary", help="report summarizing results across data")
	parser.add_argument("-f", "--features", help="List of features to be extracted")
	parser.add_argument("-v", "--version", action="version",
	version="%(prog)s {0}".format(__version__),
	help="show program's version number and exit")
	return parser


	class Report(object):

	def __init__(self, **kwargs):
	self.path = None
	self.list_of_features = None
	self.output = None
	for (key, value) in kwargs.iteritems():
	if hasattr(self, key):
	setattr(self, key, value)
	#self._log = init_log()

	def load_features(self):
	"""
	:return: A Dataframe with a single column called features
	"""
	features = [line.rstrip('\n') for line in open(self.list_of_features) if not line[0].isspace()]
	df = pd.DataFrame()
	df['features'] = features
	return df

	def extract_feature(self, draft_report, sample_bismark_output):
	"""
	:param draft_report: is basically the dataframe being built
	each time we read a sample result
	we add a column with the features specified
	:param sample_bismark_output: the bismark output for a specific sample
	:return:dataframe populated with samples
	"""
	sample_name = os.path.basename(sample_bismark_output).split(".txt")[0]
	list = draft_report.features
	result = []
	for item in list:
	with open(sample_bismark_output) as input_file:
	for line in input_file:
	#print line.split(":")[0]
	if line.split(":")[0] == item:
	#print item
	result.append(line.split(":")[1].rstrip("\n"))
	draft_report[sample_name] = result
	return draft_report

	def list_files(self):
	ls_files = []
	ls_labels = []
	for sName in os.listdir(self.path):
	if os.path.isfile(os.path.join(self.path, sName)) and sName.endswith(".txt"):
	ls_files.append(os.path.join(self.path, sName))
	fileName, fileExtension = os.path.splitext(sName)
	sName = os.path.basename(fileName).split('.')[0]
	ls_labels.append(sName)
	return ls_files

	def generate(self):
	df_temp = self.load_features()
	#print df_temp.features
	list_of_bismarks = self.list_files()
	#print list_of_bismarks
	for sample in list_of_bismarks:
	report = self.extract_feature(df_temp, sample)
	report.to_csv(self.output, index=False, sep='\t', encoding='utf-8')


	def main():

	args = arguments().parse_args()
	report = Report()
	# Set parameters
	log = init_log()
	log.info("Starting the report generator...")
	try:
	log.info("Loading features from " + args.features)
	report.list_of_features = args.features
	log.info("Loading bismark results from " + args.path)
	report.path = args.path
	report.output = args.output
	except Exception, e:
	log.exception(e)

	log.info("Started Generating report .. ")
	report.generate()
	log.info("Finished Generating report .. ")
	log.info("Report saved to " + args.output)

	if __name__ == "__main__":
	main()