Ian Sudbery IanSudbery

## pipeline.log
# 2019-06-03 10:02:34,173 INFO pipeline log is pipeline.log
# 2019-06-03 10:02:34,173 INFO output generated by ../devel/split_and_rev.py make reverse_sequence \
#                              job started at Mon Jun  3 10:02:34 2019 on node002.shef.ac.uk -- 1194b9b2-1cbc-4ab0-b71c-608b1d9c1469 \
#                              pid: 37815, system: Linux 3.10.0-957.12.2.el7.x86_64 #1 SMP Tue May 14 21:24:32 UTC 2019 x86_64
# 2019-06-03 10:02:34,173 INFO always_mount                            : False \
#                              cluster_memory_default                  : unlimited \
#                              cluster_memory_resource                 : None \
#                              cluster_num_jobs                        : None \
#                              cluster_options                         : None \
#                              cluster_parallel_environment            : None \

## seperate_logic.py
from ruffus import transform, suffix, pipeline_run
from task_functions import run_bwa, sort_bam

@transform("*.fastq", suffix(".fastq"), ".bam")
def step1(infile, outfile);
    run_bwa(infile, outfile)

@transform(run_bwa, suffix(".bam"), ".sorted_bam")
def step2(infile, outfile):
    sort_bam(infile, outfile)

## TranscriptCoordInterconverter.py
'''
Copyright 2018 Ian Sudbery

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

## calculate_effective_length.py
'''
calculate_effective_length.py - calculate mappability adjusted lengths
====================================================

:Author:
:Tags: Python

Purpose
-------

## Find_detained_introns.R
# intron_meta is a dataframe with the columns: gene_id, intron_id, CHr, Start, End, Strand, Length and efflen
# intron_chunks is a datafarme with a column for Geneid-Chr-Start-End-Strand-Length (as output by featureCounts) and other
#              columns are the counts in each sample.
# They have the same order.

library(dplyr)
library(tidyr)
intron_meta$weight <- sqrt(intron_meta$efflen)
intron_meta <- intron_meta %>% group_by(gene_id) %>% mutate(norm_weight=weight/sum(weight)) %>% ungroup()

## test_threaded_drmaa.py
import drmaa
from multiprocessing.pool import ThreadPool
import tempfile
import os
import stat

pool = ThreadPool(2)

session = drmaa.Session()
session.initialize()
	# 2019-06-03 10:02:34,173 INFO pipeline log is pipeline.log
	# 2019-06-03 10:02:34,173 INFO output generated by ../devel/split_and_rev.py make reverse_sequence \
	# job started at Mon Jun 3 10:02:34 2019 on node002.shef.ac.uk -- 1194b9b2-1cbc-4ab0-b71c-608b1d9c1469 \
	# pid: 37815, system: Linux 3.10.0-957.12.2.el7.x86_64 #1 SMP Tue May 14 21:24:32 UTC 2019 x86_64
	# 2019-06-03 10:02:34,173 INFO always_mount : False \
	# cluster_memory_default : unlimited \
	# cluster_memory_resource : None \
	# cluster_num_jobs : None \
	# cluster_options : None \
	# cluster_parallel_environment : None \
	from ruffus import transform, suffix, pipeline_run
	from task_functions import run_bwa, sort_bam

	@transform("*.fastq", suffix(".fastq"), ".bam")
	def step1(infile, outfile);
	run_bwa(infile, outfile)

	@transform(run_bwa, suffix(".bam"), ".sorted_bam")
	def step2(infile, outfile):
	sort_bam(infile, outfile)
	'''
	Copyright 2018 Ian Sudbery

	Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
	files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
	modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
	Software is furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
	'''
	calculate_effective_length.py - calculate mappability adjusted lengths
	====================================================

	:Author:
	:Tags: Python

	Purpose
	-------
	# intron_meta is a dataframe with the columns: gene_id, intron_id, CHr, Start, End, Strand, Length and efflen
	# intron_chunks is a datafarme with a column for Geneid-Chr-Start-End-Strand-Length (as output by featureCounts) and other
	# columns are the counts in each sample.
	# They have the same order.

	library(dplyr)
	library(tidyr)
	intron_meta$weight <- sqrt(intron_meta$efflen)
	intron_meta <- intron_meta %>% group_by(gene_id) %>% mutate(norm_weight=weight/sum(weight)) %>% ungroup()
	import drmaa
	from multiprocessing.pool import ThreadPool
	import tempfile
	import os
	import stat

	pool = ThreadPool(2)

	session = drmaa.Session()
	session.initialize()