Skip to content

Instantly share code, notes, and snippets.

View mndrake's full-sized avatar
🏠
Working from home

David Carlson mndrake

🏠
Working from home
View GitHub Profile
@mndrake
mndrake / hocr_parse.py
Created September 12, 2019 15:23
HOCR output parsing from pytesseract
#!/usr/bin/env python
# coding: utf-8
# In[11]:
# dependencies
import pytesseract
from bs4 import BeautifulSoup
from PIL import Image
@mndrake
mndrake / parquet_split.py
Created May 27, 2018 13:20
read/write to split parquet files
import os
from io import BytesIO
import pyarrow as pa
import pyarrow.parquet as pq
kilobytes = 1024
megabytes = kilobytes * 1000
chunksize = int(10 * megabytes)
@mndrake
mndrake / anomaly_comments_recipe.py
Last active June 3, 2022 17:51
Extract Cluster Comments for Anomaly
from urllib.parse import urlparse
import re
import dataiku
import pandas as pd
PROJECT_ID = 'CUSTOMERSEGMENTATION'
ANALYSIS_ID = 'UjW24hJ1'
ML_TASK_ID = 'LsiobCLw'
MODEL_ID = 'A-CUSTOMERSEGMENTATION-UjW24hJ1-LsiobCLw-s1-pp1-m1'
@mndrake
mndrake / shiny_leaflet_brushing.R
Created March 29, 2017 00:39
Interactive Polygon Brushing with Shiny and Leaflet
# originally from: http://stackoverflow.com/questions/42528400/plot-brushing-or-accessing-drawn-shape-geometry-for-spatial-subsets-in-shiny-lea
# uses https://github.com/bhaskarvk/leaflet.extras
library(shiny)
library(leaflet)
library(leaflet.extras)
library(sp)
cities <- structure(list(AccentCity = c("Saint Petersburg", "Harare", "Qingdao",
"Addis Abeba", "Xian", "Anshan", "Rongcheng", "Kinshasa", "New York",
from io import StringIO
import logging
class StreamingLog(object):
def __init__(self, logger_name, level=logging.INFO):
self.stream = StringIO()
self.handler = logging.StreamHandler(self.stream)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
self.handler.setFormatter(formatter)
self.log = logging.getLogger(logger_name)
@mndrake
mndrake / add_packages.R
Created March 22, 2022 16:22
miniCRAN Example
library(miniCRAN)
library(remotes)
# CRAN mirror to use (recommend checkpoint date for installed version of R https://mran.microsoft.com/timemachine)
cran_repo <- c(CRAN = "https://cran.microsoft.com/snapshot/2018-11-30")
# local path to create miniCRAN repo
miniCRAN_dir <- "/data/dataiku/miniCRAN"
@mndrake
mndrake / sas_export.py
Last active August 13, 2020 03:04
SAS dataset to sqlite wrapper of the sas7bdat python package
#!/usr/bin/python
# Filename: sas_export.py
# -*- coding: utf-8 -*-
"""
Created on Tue Jan 06 18:40:09 2015
@author: David Carlson
modified version for sas7bdat 2.0.1 of Charlie Huang version at:
http://www.sasanalysis.com/2014/08/python-extension-functions-to-translate.html
"""
@mndrake
mndrake / create_cluster.py
Last active May 22, 2020 19:05
Databricks cluster creation and config for Databricks Connect
#!python
import functools
import json
import os
import requests
import urllib
import uuid
import configparser
# TODO: CURRENTLY ONLY WORKS FOR AWS, NEED TO ADD ADDITIONAL PARSING FOR AZURE
@mndrake
mndrake / 00-setup.py
Last active May 2, 2020 00:22
Setup Jupyter kernel for Databricks dbconnect
from IPython.core.magic import line_magic, line_cell_magic, Magics, magics_class
from pyspark.sql import SparkSession
from pyspark.dbutils import DBUtils
spark = SparkSession.builder.getOrCreate()
sc = spark.sparkContext
dbutils = DBUtils(sc)
@magics_class
class DatabricksConnectMagics(Magics):
@mndrake
mndrake / Excel4.fs
Last active March 11, 2020 09:46
A F# wrapper class for the Excel4/Excel12 methods contained in Excel-DNA to mimic basic methods of the COM object model
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
namespace Utility