This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import shutil | |
from tqdm import tqdm | |
def download_file(url: str, file_path: str) -> None: | |
""" | |
Downloads a file from the given URL and saves it to the specified file path. | |
Shows a progress bar using the tqdm library. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def print_columns(data: list, num_cols : int): | |
""" | |
Prints the elements of a list in columns. | |
Args: | |
data: A list of data to be printed. | |
num_cols: The number of columns to print. | |
Returns: | |
None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remove covariance in order to reduce the number of features | |
import numpy as np | |
def drop_covariance(df, threshold=0.95): | |
# Create correlation matrix | |
corr_matrix = df.corr().abs() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# read more about | |
# http://matpalm.com/resemblance/simhash/ | |
# | |
from collections import Counter | |
from itertools import chain | |
from toolz import pipe | |
import numpy as np | |
global HASH_SIZE |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
http://rocksdb.org | |
sudo apt-get install librocksdb-dev | |
pip3 install Cython python-rocksdb | |
""" | |
import rocksdb, gc | |
import numpy as np | |
rocksdb_fm_path = 'test.db' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
```{r setup, include=TRUE} | |
# clean up R environment | |
rm(list = ls(all = TRUE)) | |
# permanently setting the CRAN repository | |
options(repos = getOption("repos")["CRAN"]) | |
if (!requireNamespace("pacman", quietly = TRUE)) | |
install.packages("pacman") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip3 install pygsheets -user | |
import pygsheets | |
import sys | |
# Get user's authorization as a Service Account | |
# Turn on Google Drive and Google Sheets API | |
SPREADSHEET_NAME = 'ABCD' | |
key = './pygsheets-348111-b94d5f8fa9f2.json' | |
gc = pygsheets.authorize(service_file=key) | |
# do prior running the script: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import modin.pandas as pd | |
from modin.config import Engine | |
# Engine.put("ray") # Modin will use Ray | |
Engine.put("dask") # Modin will use Dask |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%%writefile bed2bins.py | |
# instead of https://bedtools.readthedocs.io/en/latest/content/tools/makewindows.html | |
# cat file_name.bam | bedtools makewindows -b - -n 5 -i winnum | |
# | |
# How to use: | |
# cat file_name.bam | python3 bed2bins.py 5 | |
# | |
# expects to stdin from bam file with 6 fields, i.e., | |
# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load tsv with float values of various length as numpy array | |
import numpy as np | |
from itertools import zip_longest | |
file_name = # specify | |
with open(file_name) as file: | |
lines = [np.asarray(line.rstrip().split('\t')).astype(np.float16) for line in file] | |