Skip to content

Instantly share code, notes, and snippets.

View natekupp's full-sized avatar

Nate Kupp natekupp

View GitHub Profile
@natekupp
natekupp / gist:1763661
Created February 8, 2012 00:55
Python B-Trees
class BTreeNode(object):
"""A B-Tree Node.
attributes
=====================
leaf : boolean, determines whether this node is a leaf.
keys : list, a list of keys internal to this node
c : list, a list of children of this node
"""
def __init__(self, leaf=False):
import datetime
import os
from airflow.contrib.operators import dataproc_operator
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator
from airflow.models import DAG
from airflow.utils import trigger_rule
PROJECT_ID = os.getenv('GCP_PROJECT_ID')
@natekupp
natekupp / hadoop_fixup.diff
Created March 15, 2020 03:12
MacOS OpenSSL Fix for Hadoop Pipes XML
diff --git a/hadoop-tools/hadoop-pipes/pom.xml b/hadoop-tools/hadoop-pipes/pom.xml
index 70f66f973d4..5063da640cc 100644
--- a/hadoop-tools/hadoop-pipes/pom.xml
+++ b/hadoop-tools/hadoop-pipes/pom.xml
@@ -54,6 +54,9 @@
<source>${basedir}/src</source>
<vars>
<JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
+ <OPENSSL_INCLUDE_DIR>/usr/local/Cellar/openssl/1.0.2t/include</OPENSSL_INCLUDE_DIR>
+ <OPENSSL_SSL_LIBRARY>/usr/local/Cellar/openssl/1.0.2t/lib/libssl.dylib</OPENSSL_SSL_LIBRARY>
# example_airflow.py
from dagster import lambda_solid, pipeline
@lambda_solid
def hello_world():
return "hello, world"
@pipeline
def basic_pipeline():
hello_world()
@natekupp
natekupp / dask_exception.py
Last active May 16, 2019 04:18
Minimal repro example of Dask exception
import dask
import dask.distributed
from tornado import gen
PUBSUB_NAME = 'test'
class Task:
def __init__(self, key, num, upstream_tasks):
class DataProcSparkOperator(BaseOperator):
"""
Start a Cloud DataProc cluster, run a Spark job, then shut down the Spark cluster.
"""
template_fields = ['arguments']
ui_color = '#0273d4'
@apply_defaults
def __init__(
self,
@natekupp
natekupp / gist:2954743
Created June 19, 2012 15:21
numpy_imshow
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Terminal visualization of 2D numpy arrays
# Copyright (c) 2009 Nicolas P. Rougier
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
@natekupp
natekupp / gist:2628399
Created May 7, 2012 15:26
running node.js and simple HTTP server
try:
# start http server
self.p_http = subprocess.Popen(
["python", "-m", "SimpleHTTPServer"],
preexec_fn = sigint_replace,
cwd = self.resource_dir
)
# start node.js server
self.p_node = subprocess.Popen(
@natekupp
natekupp / gist:2090168
Created March 19, 2012 01:55
Installation for py2cairo on OSX Lion 10.7
python waf clean
export PYTHONPATH=/Library/Frameworks/Python.framework/Versions/2.7/
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/2.7/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/2.7/lib:$LD_LIBRARY_PATH
export LINKFLAGS='-search_dylibs_first -L /Library/Frameworks/Python.framework/Versions/2.7/lib/'
export ARCHFLAGS='-arch x86_64'
export CC=/usr/bin/gcc-4.2
export PKG_CONFIG_PATH=/usr/local/Cellar/cairo/1.10.2/lib/pkgconfig/
python waf configure --prefix=$PYTHONPATH
@natekupp
natekupp / gist:2047390
Created March 15, 2012 22:33
FFX Performance Test
def buildmodel():
EPS = 0.001
data = pandas.read_csv('iris.csv')
xtrain = data.ix[:50,0:2]
xtest = data.ix[51:100,0:2]
ytrain = data.ix[:50,2]
ytest = data.ix[51:100,2]
# Use pandas.DataFrame
models = ffx.run(xtrain, ytrain, xtest, ytest)