Skip to content

Instantly share code, notes, and snippets.

Avatar
🧬
Trying to write genomic library in Rust

endrebak.ada endrebak

🧬
Trying to write genomic library in Rust
View GitHub Profile
View pyranges.py
import pyranges as pr
gr = pr.PyRanges(chromosomes=["chr1"] * 2 + ["chr2"] * 2, starts=[0, 100, 200, 300], ends=[50, 150, 250, 350], strands=["+", "+", "-", "-"])
gr.spliced_subsequence(0, -75, by="gene")
+--------------+-----------+-----------+--------------+------------+
| Chromosome | Start | End | Strand | gene |
| (category) | (int64) | (int32) | (category) | (object) |
|--------------+-----------+-----------+--------------+------------|
| chr1 | 0 | 25 | + | 1 |
View chart.js
function rest(dag) {
layering = d3.layeringSimplex()
decrossing = d3.decrossOpt()
coords = d3.coordQuad()
View karabiner
{
"global": {
"check_for_updates_on_startup": true,
"show_in_menu_bar": true,
"show_profile_name_in_menu_bar": false
},
"profiles": [
{
"complex_modifications": {
"parameters": {
View ucorrelate.py
@numba.jit
def ucorrelate(t, u, maxlag=None):
"""Compute correlation of two signals defined at uniformly-spaced points.
The correlation is defined only for positive lags (including zero).
The input arrays represent signals defined at uniformily-spaced
points. This function is equivalent to :func:`numpy.correlate`, but can
efficiently compute correlations on a limited number of lags.
Note that binning point-processes with uniform bins, provides
signals that can be passed as argument to this function.
Arguments:
View test_pyramid.py
half_window_size = 3
len_haps = 10
# half_window_size += 1
for i in range(0, half_window_size):
for j in range(i):
j1 = i - j
View calc_autocovar.pyx
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
View calc_covar.pyx
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
View calc_covar.pyx
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
# cimport numpy as cnp
import pandas as pd
from time import time
View get_lncrna_overlaps_and_nearest.py
import pyranges_db as db
import pyranges as pr
gr = db.gencode.genes("human") # takes a while to download from ftp
# Wall time: 2min 1s
gr.to_gtf("gencode_human.gtf.gz") # takes a while to gzip and write to disk
# Wall time: 4min 3s
# subset for faster operations
View fisher_exact_gene_biotypes.py
rule gene_biotypes:
input:
regions = gene_biotype_infiles,
annotation = "{prefix}/data/{genome}/annotation.tsv"
output:
"{prefix}/data/{genome}/{hmm_or_anatomy}_regions/{cutoff}/gene_biotype_counts.tsv"
run:
df = pd.read_table(input.regions, header=0)
df2 = pd.read_csv(input.annotation, header=0, sep=",")