Wouter De Coster wdecoster

## .bashrc
# ~/.bashrc: executed by bash(1) for non-login shells.
# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
# for examples

# If not running interactively, don't do anything
case $- in
    *i*) ;;
      *) return;;
esac

## varplot.py
from cyvcf2 import VCF
import pandas as pd
import plotly.graph_objects as go
import plotly
import sys

def main():
    vcf = VCF(sys.argv[1])
    filter_counts = pd.DataFrame.from_records([list(v.gt_types) + [v.FILTER] for v in vcf], columns = vcf.samples + ['filter']) \
     .apply(lambda x: is_variant(x), axis=1, result_type='expand') \

## per_base_quality.py
from Bio import SeqIO
import seaborn as sns
from collections import defaultdict
import gzip
import sys

q_per_base = defaultdict(list)
for record in SeqIO.parse(gzip.open(sys.argv[1], 'rt'), "fastq"):
    for n, q in zip(record.seq, record.letter_annotations["phred_quality"]):
        q_per_base[n].append(q)

## get_large_insertions.py
import pysam
from argparse import ArgumentParser


def main():
    args = get_args()
    samfile = pysam.AlignmentFile(args.bam)
    ins = [get_long_ins(read.cigartuples, read.query_sequence)
           for read in samfile.fetch() if read.query_sequence]
    head = 1

## confusion_matrix.py
from argparse import ArgumentParser
from cyvcf2 import VCF
import pandas as pd


def main():
    args = get_args()
    confusion_matrix(args.vcf)


## 356286.py
from argparse import ArgumentParser
from Bio import SeqIO
from collections import Counter, defaultdict
import gzip


def main():
    args = get_args()
    primers = defaultdict(int)
    for pattern in args.pattern:

## pid_over_time.py
import pickle
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from math import ceil

df_al = pickle.load(open("alignment.pickle", "rb"))
df_sum = pickle.load(open("summary.pickle", "rb"))

df_al_sel = df_al[df_al["lengths"] > 200][["readIDs", "percentIdentity"]].set_index('readIDs')

## yield_vs_N50.py
import pandas as pd
import matplotlib.pyplot as plt


df = pd.read_csv("yield_vs_N50.txt", sep="\t")
df.loc[:, "yield"] = df.loc[:, "yield"] / 1e9
df.plot(x="yield", y="N50", kind="scatter")
plt.show()

## fisher_exact.py
from scipy import stats
OR, pvalue = stats.fisher_exact([[96, 4], [82, 18]])

## waffle_code.py
import matplotlib.pyplot as plt
from pywaffle import Waffle

fig = plt.figure(
    FigureClass=Waffle,
    rows=5,
    values={'AA': 82, 'AT': 18},
    title={'label': 'Genotypes of patients', 'loc': 'center'},
    legend={'loc': 'lower left', 'bbox_to_anchor': (0, -0.4), 'ncol': 2, 'framealpha': 0},
    icons='male', icon_size=18,
	# ~/.bashrc: executed by bash(1) for non-login shells.
	# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
	# for examples

	# If not running interactively, don't do anything
	case $- in
	i) ;;
	*) return;;
	esac
	from cyvcf2 import VCF
	import pandas as pd
	import plotly.graph_objects as go
	import plotly
	import sys

	def main():
	vcf = VCF(sys.argv[1])
	filter_counts = pd.DataFrame.from_records([list(v.gt_types) + [v.FILTER] for v in vcf], columns = vcf.samples + ['filter']) \
	.apply(lambda x: is_variant(x), axis=1, result_type='expand') \
	from Bio import SeqIO
	import seaborn as sns
	from collections import defaultdict
	import gzip
	import sys

	q_per_base = defaultdict(list)
	for record in SeqIO.parse(gzip.open(sys.argv[1], 'rt'), "fastq"):
	for n, q in zip(record.seq, record.letter_annotations["phred_quality"]):
	q_per_base[n].append(q)
	import pysam
	from argparse import ArgumentParser


	def main():
	args = get_args()
	samfile = pysam.AlignmentFile(args.bam)
	ins = [get_long_ins(read.cigartuples, read.query_sequence)
	for read in samfile.fetch() if read.query_sequence]
	head = 1
	from argparse import ArgumentParser
	from Bio import SeqIO
	from collections import Counter, defaultdict
	import gzip


	def main():
	args = get_args()
	primers = defaultdict(int)
	for pattern in args.pattern:
	import pickle
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	from math import ceil

	df_al = pickle.load(open("alignment.pickle", "rb"))
	df_sum = pickle.load(open("summary.pickle", "rb"))

	df_al_sel = df_al[df_al["lengths"] > 200][["readIDs", "percentIdentity"]].set_index('readIDs')
	import pandas as pd
	import matplotlib.pyplot as plt


	df = pd.read_csv("yield_vs_N50.txt", sep="\t")
	df.loc[:, "yield"] = df.loc[:, "yield"] / 1e9
	df.plot(x="yield", y="N50", kind="scatter")
	plt.show()
	from scipy import stats
	OR, pvalue = stats.fisher_exact([[96, 4], [82, 18]])
	import matplotlib.pyplot as plt
	from pywaffle import Waffle

	fig = plt.figure(
	FigureClass=Waffle,
	rows=5,
	values={'AA': 82, 'AT': 18},
	title={'label': 'Genotypes of patients', 'loc': 'center'},
	legend={'loc': 'lower left', 'bbox_to_anchor': (0, -0.4), 'ncol': 2, 'framealpha': 0},
	icons='male', icon_size=18,