Skip to content

Instantly share code, notes, and snippets.

View chrisjurich's full-sized avatar

chrisjurich

View GitHub Profile
import math
def stat_test( lower_avg, lower_err, upper_avg, upper_err ):
# not a real funciton but shoudl return a p value that the
# lower_avg < upper_avg
raise TypeError("not implemented")
from scipy.stats import mannwhitneyu
# Source: https://www.biorxiv.org/content/10.1101/2020.06.29.178343v2.full.pdf
def dsci( sequence, target, dms ):
# TODO remember, have to replace the dead nt's with N
assert len( sequence ) == len( target ) and len( target ) == len( dms )
# first, gotta do the paired/unpaired
paired, unpaired = [], []
for nt, db, val in zip( sequence, target, dms):
from enum import Enum
from abc import ABC, abstractmethod
from multipledispatch import dispatch
class MotifType(Enum):
SINGLESTRAND = 0
HELIX = 1
HAIRPIN = 2
JUNCTION = 3
#include<tuple>
#include<vector>
#include<map>
#include<iostream>
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
// NOTE: you have to pass the flag "-std=c++17" to the compiler.
///////////////////////////////////////////////////////////////////
import urllib.request
from html.parser import HTMLParser
from bs4 import BeautifulSoup
import re
import datetime
###################################################################################
####################### HELPER FUNCTIONS ##########################################
def get_date_from_href(href):
"""Helper method that gets date from a hypertext link. Assumes format of MM/DD/YYYY. Raises error if number of matches != 1"""
def blended_eterna_score(row,scale_max=10):
"""Method that finds the blended, non-binary eterna score for a structure. Takes the row from a pandas df as input"""
assert len(row["target_structure"]) == len(row["sequence"])
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
# data_len is the number of data points that will be reviewed
data_len = min(
len(row["target_structure"]),
len(row["SHAPE_data"]), # can probably get rid of this one
len(sequence),
def eterna_recalculated(row,scale_max=2.3):
"""Helper method that recalculates the Eterna score for an entry from a dataframe. It will then put the score back into the row. Please note that there is not a 1:1 correspondence between the actual and recalculated scores"""
assert len(row["target_structure"]) == len(row["sequence"])
# sometimes there is a fingerprint sequence at the end of the sturcutre, If that is the case it needs to be removed
sequence = re.sub("AAAGAAACAACAACAACAAC$","",row["sequence"])
# data_len is the number of data points that will be reviewed
data_len = min(
len(row["target_structure"]),
len(row["SHAPE_data"]), # can probably get rid of this one
len(sequence),
def is_header_file(file_name):
"""Method that checks if the file is a C++ header file"""
file_ending = file_name.split('.')[-1].lower()
for ending in "h|hh|hpp|hxx|h++".split('|'):
if ending == file_ending:
return True
return False
def is_source_file(file_name):
"""Method that checks if the file is a C++ source file"""
#include <iostream>
#include <vector>
struct Strings : std::vector<std::string> {
std::vector<std::string> tokens;
// unfortunately you have to override all of the constructors
Strings(std::vector<std::string> input_tokens) : tokens(std::move(input_tokens)) {}
#include <iostream>
#include <set>
#include <map>
bool
is_num(std::string& string_token, std::set<char>& accepted_characters){
auto accepted_chars = std::map<char,int>();
for(const auto& character : string_token) {
if(accepted_characters.find(character) == accepted_characters.end()) {
if(character < '0' or character > '9') {