Skip to content

Instantly share code, notes, and snippets.

Avatar
🏠
Working from home

David Carlson mndrake

🏠
Working from home
View GitHub Profile
@mndrake
mndrake / create_cluster.py
Last active May 22, 2020
Databricks cluster creation and config for Databricks Connect
View create_cluster.py
#!python
import functools
import json
import os
import requests
import urllib
import uuid
import configparser
# TODO: CURRENTLY ONLY WORKS FOR AWS, NEED TO ADD ADDITIONAL PARSING FOR AZURE
@mndrake
mndrake / 00-setup.py
Last active May 2, 2020
Setup Jupyter kernel for Databricks dbconnect
View 00-setup.py
from IPython.core.magic import line_magic, line_cell_magic, Magics, magics_class
from pyspark.sql import SparkSession
from pyspark.dbutils import DBUtils
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
dbutils = DBUtils(sc)
@magics_class
class DatabricksConnectMagics(Magics):
@mndrake
mndrake / hocr_parse.py
Created Sep 12, 2019
HOCR output parsing from pytesseract
View hocr_parse.py
#!/usr/bin/env python
# coding: utf-8
# In[11]:
# dependencies
import pytesseract
from bs4 import BeautifulSoup
from PIL import Image
@mndrake
mndrake / .vimrc
Last active Jun 18, 2018
python 2.7 vim config
View .vimrc
"*****************************************************************************
"" Vim-PLug core
"*****************************************************************************
if has('vim_starting')
set nocompatible " Be iMproved
endif
let g:vim_bootstrap_langs = "html, javascript,python,scala"
let g:vim_bootstrap_editor = "vim"
@mndrake
mndrake / init.vim
Last active Jun 17, 2018
neovim init.vim
View init.vim
"*****************************************************************************
"" Vim-PLug core
"*****************************************************************************
if has('vim_starting')
set nocompatible " Be iMproved
endif
let vimplug_exists=expand('~/.config/nvim/autoload/plug.vim')
let g:vim_bootstrap_langs = "html,javascript,python,scala"
@mndrake
mndrake / start.sh
Last active Jun 17, 2018
domino start script for jupyterlab
View start.sh
#!/bin/bash
set -o nounset -o errexit -o pipefail
IP_ADDR=$(/sbin/ifconfig eth0 | grep "inet addr" | cut -d ":" -f2 | cut -d " " -f1)
CONF_DIR="$HOME/.ipython/profile_default"
CONF_FILE="${CONF_DIR}/ipython_notebook_config.py"
mkdir -p "${CONF_DIR}"
cat <<EOF >>"${CONF_FILE}"
@mndrake
mndrake / parquet_split.py
Created May 27, 2018
read/write to split parquet files
View parquet_split.py
import os
from io import BytesIO
import pyarrow as pa
import pyarrow.parquet as pq
kilobytes = 1024
megabytes = kilobytes * 1000
chunksize = int(10 * megabytes)
@mndrake
mndrake / shiny_leaflet_brushing.R
Created Mar 29, 2017
Interactive Polygon Brushing with Shiny and Leaflet
View shiny_leaflet_brushing.R
# originally from: http://stackoverflow.com/questions/42528400/plot-brushing-or-accessing-drawn-shape-geometry-for-spatial-subsets-in-shiny-lea
# uses https://github.com/bhaskarvk/leaflet.extras
library(shiny)
library(leaflet)
library(leaflet.extras)
library(sp)
cities <- structure(list(AccentCity = c("Saint Petersburg", "Harare", "Qingdao",
"Addis Abeba", "Xian", "Anshan", "Rongcheng", "Kinshasa", "New York",
View server.R
library(shiny)
shinyServer(function(input, output, session) {
# Return the components of the URL in a string:
output$urlText <- renderText({
paste(sep = "",
"protocol: ", session$clientData$url_protocol, "\n",
"hostname: ", session$clientData$url_hostname, "\n",
"pathname: ", session$clientData$url_pathname, "\n",
@mndrake
mndrake / dplyr_sql_extension.R
Last active Aug 24, 2016
extension methods for dplyr remote tables
View dplyr_sql_extension.R
library(dplyr)
library(nycflights13)
conn <- nycflights13_sqlite()
flights_sql <- tbl(conn, 'flights')
weather_sql <- tbl(conn, 'weather')
`[.tbl_sql` <- function(x, condition) {
x %>% select_(condition)
}