Skip to content

Instantly share code, notes, and snippets.

View endrebak's full-sized avatar
🧬
Trying to write a genomic library in Rust

endrebak.ada endrebak

🧬
Trying to write a genomic library in Rust
View GitHub Profile
from typing import List
import polars as pl
import numpy as np
import bioframe.core.arrops as arrops
import pyoframe as pf
import polars as pl
import pyranges as pr
gr = pr.PyRanges(chromosomes=["chr1"] * 2 + ["chr2"] * 2, starts=[0, 100, 200, 300], ends=[50, 150, 250, 350], strands=["+", "+", "-", "-"])
gr.spliced_subsequence(0, -75, by="gene")
+--------------+-----------+-----------+--------------+------------+
| Chromosome | Start | End | Strand | gene |
| (category) | (int64) | (int32) | (category) | (object) |
|--------------+-----------+-----------+--------------+------------|
| chr1 | 0 | 25 | + | 1 |
function rest(dag) {
layering = d3.layeringSimplex()
decrossing = d3.decrossOpt()
coords = d3.coordQuad()
{
"global": {
"check_for_updates_on_startup": true,
"show_in_menu_bar": true,
"show_profile_name_in_menu_bar": false
},
"profiles": [
{
"complex_modifications": {
"parameters": {
@numba.jit
def ucorrelate(t, u, maxlag=None):
"""Compute correlation of two signals defined at uniformly-spaced points.
The correlation is defined only for positive lags (including zero).
The input arrays represent signals defined at uniformily-spaced
points. This function is equivalent to :func:`numpy.correlate`, but can
efficiently compute correlations on a limited number of lags.
Note that binning point-processes with uniform bins, provides
signals that can be passed as argument to this function.
Arguments:
half_window_size = 3
len_haps = 10
# half_window_size += 1
for i in range(0, half_window_size):
for j in range(i):
j1 = i - j
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
import pandas as pd
from time import time
from libc.math cimport exp, fabs
#!/usr/bin/env python3
#cython: language_level=3, boundscheck=False, wraparound=False, initializedcheck=False, cdivision=True
import sys, math, gzip
import numpy as np
# cimport numpy as cnp
import pandas as pd
from time import time
import pyranges_db as db
import pyranges as pr
gr = db.gencode.genes("human") # takes a while to download from ftp
# Wall time: 2min 1s
gr.to_gtf("gencode_human.gtf.gz") # takes a while to gzip and write to disk
# Wall time: 4min 3s
# subset for faster operations