Skip to content

Instantly share code, notes, and snippets.

Nate Kupp natekupp

View GitHub Profile
@natekupp
natekupp / hadoop_fixup.diff
Created Mar 15, 2020
MacOS OpenSSL Fix for Hadoop Pipes XML
View hadoop_fixup.diff
diff --git a/hadoop-tools/hadoop-pipes/pom.xml b/hadoop-tools/hadoop-pipes/pom.xml
index 70f66f973d4..5063da640cc 100644
--- a/hadoop-tools/hadoop-pipes/pom.xml
+++ b/hadoop-tools/hadoop-pipes/pom.xml
@@ -54,6 +54,9 @@
<source>${basedir}/src</source>
<vars>
<JVM_ARCH_DATA_MODEL>${sun.arch.data.model}</JVM_ARCH_DATA_MODEL>
+ <OPENSSL_INCLUDE_DIR>/usr/local/Cellar/openssl/1.0.2t/include</OPENSSL_INCLUDE_DIR>
+ <OPENSSL_SSL_LIBRARY>/usr/local/Cellar/openssl/1.0.2t/lib/libssl.dylib</OPENSSL_SSL_LIBRARY>
View example_airflow.py
# example_airflow.py
from dagster import lambda_solid, pipeline
@lambda_solid
def hello_world():
return "hello, world"
@pipeline
def basic_pipeline():
hello_world()
View example_gcp_data_pipeline.py
import datetime
import os
from airflow.contrib.operators import dataproc_operator
from airflow.contrib.operators.bigquery_operator import BigQueryOperator
from airflow.contrib.operators.gcs_to_bq import GoogleCloudStorageToBigQueryOperator
from airflow.models import DAG
from airflow.utils import trigger_rule
PROJECT_ID = os.getenv('GCP_PROJECT_ID')
@natekupp
natekupp / dask_exception.py
Last active May 16, 2019
Minimal repro example of Dask exception
View dask_exception.py
import dask
import dask.distributed
from tornado import gen
PUBSUB_NAME = 'test'
class Task:
def __init__(self, key, num, upstream_tasks):
View dataproc_spark_operator.py
class DataProcSparkOperator(BaseOperator):
"""
Start a Cloud DataProc cluster, run a Spark job, then shut down the Spark cluster.
"""
template_fields = ['arguments']
ui_color = '#0273d4'
@apply_defaults
def __init__(
self,
@natekupp
natekupp / example.py
Created Sep 26, 2015
impyla+multiprocessing crash example
View example.py
import multiprocessing
import impala.dbapi
class Impala(obect):
def __init__(self, host, database, port, username, password, lock=None):
self.conn = impala.dbapi.connect(host=host,
port=port,
database=database,
user=username,
password=password,
View gist:2954743
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Terminal visualization of 2D numpy arrays
# Copyright (c) 2009 Nicolas P. Rougier
#
# This program is free software: you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version.
@natekupp
natekupp / gist:2628399
Created May 7, 2012
running node.js and simple HTTP server
View gist:2628399
try:
# start http server
self.p_http = subprocess.Popen(
["python", "-m", "SimpleHTTPServer"],
preexec_fn = sigint_replace,
cwd = self.resource_dir
)
# start node.js server
self.p_node = subprocess.Popen(
@natekupp
natekupp / gist:2090168
Created Mar 19, 2012
Installation for py2cairo on OSX Lion 10.7
View gist:2090168
python waf clean
export PYTHONPATH=/Library/Frameworks/Python.framework/Versions/2.7/
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/2.7/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/Library/Frameworks/Python.framework/Versions/2.7/lib:$LD_LIBRARY_PATH
export LINKFLAGS='-search_dylibs_first -L /Library/Frameworks/Python.framework/Versions/2.7/lib/'
export ARCHFLAGS='-arch x86_64'
export CC=/usr/bin/gcc-4.2
export PKG_CONFIG_PATH=/usr/local/Cellar/cairo/1.10.2/lib/pkgconfig/
python waf configure --prefix=$PYTHONPATH
@natekupp
natekupp / gist:2047390
Created Mar 15, 2012
FFX Performance Test
View gist:2047390
def buildmodel():
EPS = 0.001
data = pandas.read_csv('iris.csv')
xtrain = data.ix[:50,0:2]
xtest = data.ix[51:100,0:2]
ytrain = data.ix[:50,2]
ytest = data.ix[51:100,2]
# Use pandas.DataFrame
models = ffx.run(xtrain, ytrain, xtest, ytest)
You can’t perform that action at this time.