Skip to content

Instantly share code, notes, and snippets.

View mdshw5's full-sized avatar

Matt Shirley mdshw5

View GitHub Profile
@mdshw5
mdshw5 / answer.py
Created July 27, 2017 03:16
biostars 264821
import simplesam
barcodes = {}
with open('read_id_barcode_umi.txt') as barcodes_file:
for line in barcodes_file:
# should check the delimiter in this file. If it's ' ' or \t or ','
read_id, umi, barcode = line.rstrip().split()
barcode[read_id] = (umi, barcode)
# reading this entire file could use a TON of memory if
# if you have lots of reads
@mdshw5
mdshw5 / answer.py
Created July 19, 2017 14:27
biostars 263478
from pyfaidx import Fasta
with Fasta('input.fasta') as fasta:
with open('pos.txt', 'r') as nucleotides:
for line in nucleotides:
chrom, pos = line.rstrip().split()
nuc = fasta[chrom][int(pos)].seq
print("{chrom}\t{pos}\t{nuc}".format(**locals()))
@mdshw5
mdshw5 / answer.py
Last active July 14, 2017 17:48
biostars 262660
from pyfaidx import Fasta
with Fasta("file_2.fasta") as records:
with open("file_1") as content:
for line in content:
_, ec, filename = line.rstrip().split()
with open(filename, "w") as out_file:
for record in records:
if ec in record.name:
out_file.write(repr(record))
@mdshw5
mdshw5 / default.grub
Created November 13, 2016 03:29
grub config
GRUB_CMDLINE_LINUX_DEFAULT="rootflags=degraded,subvol=@ intel_iommu=on,igfx_off vfio_iommu_type1.allow_unsafe_interrupts=1 pcie_acs_override=downstream"
@mdshw5
mdshw5 / Steam.xml
Created November 13, 2016 01:45
Steam KVM virtual machine for NVidea GeForce 950GTX passthrough
<domain type='kvm'>
<name>Steam</name>
<uuid>90325573-ce4b-4ffc-875e-ca31f2d2f859</uuid>
<memory unit='KiB'>2097152</memory>
<currentMemory unit='KiB'>2097152</currentMemory>
<vcpu placement='static'>4</vcpu>
<os>
<type arch='x86_64' machine='pc-q35-2.5'>hvm</type>
<loader readonly='yes' type='pflash'>/usr/share/OVMF/OVMF_CODE.fd</loader>
<nvram>/var/lib/libvirt/qemu/nvram/Steam_VARS.fd</nvram>
@mdshw5
mdshw5 / rule.py
Created October 10, 2016 17:47
canvas manifest creation
## This is a rule for use in Snakemake
rule create_canvas_xml:
input: fasta=config["mouse_fasta"]
output: xml="GenomeSize.xml", genome="genome.fa"
params: runtime="7200", memory="2G"
run:
from pyfaidx import Fa
@mdshw5
mdshw5 / answer.py
Created July 30, 2016 17:51
biostars 204336
from pyfaidx import Fasta
with Fasta('1st.fa') as first, Fasta('2nd.fa') as second, open('result.fa', 'w') as result:
for a, b in zip(first, second):
result.write('>' + a.name)
result.write(str(a))
result.write('>' + b.name)
result.write(str(b))
@mdshw5
mdshw5 / example.py
Last active July 25, 2016 15:42
biostars 203117
from pyfaidx import FastaVariant
import vcf
samples = vcf.Reader(open('calls.vcf.gz', 'r')).samples
for sample in samples:
with FastaVariant('reference.fasta', 'calls.vcf.gz', sample=sample, het=True, hom=True) as consensus:
with open(sample + '.fasta', 'w') as sample_fasta:
for record in consensus:
sample_fasta.write('>' + record.long_name)
@mdshw5
mdshw5 / answer.py
Created April 8, 2016 00:57
biostars 183260
from pyfaidx import FastaVariant
with FastaVariant('genome.fasta', 'tabix_indexed_variants.vcf.gz', het=True, hom=True) as consensus:
for chromosome in consensus:
for site in chromosome.variant_sites:
flanking = chromosome[site-2:site+1]
## do something with flanking sequence
print(flanking.seq) ## ATG
@mdshw5
mdshw5 / answer.sh
Last active March 25, 2016 13:45
biostars 183279
pip install pyfaidx
faidx --transform nucleotide giant.fasta > base_counts.txt
cat base_counts.txt | awk '{if ($8 == 0); print $1}' > seqs_without_n.txt
xargs faidx giant.fasta < seqs_without_n.txt