This document contains lessons learned with regard to Databricks programming, but also contains some best practices
blobname = "miraw"
storageaccount = "rdmidlgen2"
mountname = "/rdmi"
configs = {"fs.azure.account.auth.type": "OAuth",
# $Id: vim-keys.conf,v 1.2 2010-09-18 09:36:15 nicm Exp $ | |
# | |
# vim-keys.conf, v1.2 2010/09/12 | |
# | |
# By Daniel Thau. Public domain. | |
# | |
# This configuration file binds many vi- and vim-like bindings to the | |
# appropriate tmux key bindings. Note that for many key bindings there is no | |
# tmux analogue. This is intended for tmux 1.3, which handles pane selection | |
# differently from the previous versions |
# Functions for parallelizing things | |
def init_spark(nproc=-1, appname="sparksession"): | |
"""Function to start a Spark executor.""" | |
from pyspark.sql import SparkSession | |
if nproc == -1: | |
# Use all CUPs | |
spark = SparkSession.builder.master( | |
"local[*]").appName(appname).getOrCreate() | |
else: |
blastp -db fasta.fa -query database.fa \ | |
-outfmt "6 std stitle qcovs" -num_threads 10 -out out.blast |
#!/bin/bash | |
TYPE=${TYPE:-prot} | |
[[ ! -z ${1} ]] && INFILE=${1} || exit 1 | |
shift | |
makeblastdb -in ${INFILE} -dbtype ${TYPE} -parse_seqids ${@} -blastdb_version 5 |
#!/usr/bin/env python | |
import pandas as pd | |
import click | |
from Bio.Seq import Seq | |
from Bio.SeqRecord import SeqRecord | |
from Bio import SeqIO | |
@click.command() |
# List unique values in a DataFrame column | |
pd.unique(df.column_name.ravel()) | |
# Convert Series datatype to numeric, getting rid of any non-numeric values | |
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
# Grab DataFrame rows where column has certain values | |
valuelist = ['value1', 'value2', 'value3'] | |
df = df[df.column.isin(valuelist)] |
def flatten_json(y): | |
out = {} | |
def flatten(x, name=''): | |
if type(x) is dict: | |
for a in x: | |
flatten(x[a], name + a + '_') | |
elif type(x) is list: | |
i = 0 | |
for a in x: |
#!/usr/bin/env python | |
# Sequence alignment using PyMOL | |
# The purpose of this script is to generate a sequence alignment between | |
# the original crystal structure of the apo and holo models, and the sequence | |
# of the finalised, ungapped Rosetta models. This allows us to get a 1 to 1 | |
# corresponcence between the residue numberings in both structures. | |
# USAGE: Run once from the project root. | |
# "pockets.csv" contains the information about apo holo pairs. |
This document contains lessons learned with regard to Databricks programming, but also contains some best practices
blobname = "miraw"
storageaccount = "rdmidlgen2"
mountname = "/rdmi"
configs = {"fs.azure.account.auth.type": "OAuth",