Skip to content

Instantly share code, notes, and snippets.

View zmjjmz's full-sized avatar

Zachary Jablons zmjjmz

View GitHub Profile
@zmjjmz
zmjjmz / documented_example.py
Created July 11, 2019 22:02
TF keras export usage
import tensorflow as tf
# Create a tf.keras model.
print(tf.version.VERSION)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=[10]))
model.summary()
# Save the tf.keras model in the SavedModel format.
saved_to_path = tf.keras.experimental.export(
@zmjjmz
zmjjmz / breakage
Created April 30, 2019 21:40
from_json nightmare
An error occurred while calling o4971.count.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 255.0 failed 4 times, most recent failure: Lost task 3.3 in stage 255.0 (TID 840, ip-172-32-98-36.ec2.internal, executor 1): java.lang.ClassCastException: java.lang.Boolean cannot be cast to java.lang.String
at org.apache.spark.sql.catalyst.json.JSONOptions$$anonfun$27.apply(JSONOptions.scala:84)
at scala.Option.map(Option.scala:146)
at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:84)
at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:43)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.parser$lzycompute(jsonExpressions.scala:555)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.parser(jsonExpressions.scala:552)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.nullSafeEval(jsonExpressions.scala:585)
at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:331)
@zmjjmz
zmjjmz / main log
Created April 15, 2019 20:35
batch transform logs
[2019-04-15 20:17:14 +0000] [22] [INFO] Starting gunicorn 19.9.0
[2019-04-15 20:17:14 +0000] [22] [INFO] Listening at: unix:/tmp/gunicorn.sock (22)
[2019-04-15 20:17:14 +0000] [22] [INFO] Using worker: gevent
[2019-04-15 20:17:14 +0000] [33] [INFO] Booting worker with pid: 33
[2019-04-15 20:17:14 +0000] [34] [INFO] Booting worker with pid: 34
[2019-04-15 20:17:14 +0000] [42] [INFO] Booting worker with pid: 42
[2019-04-15 20:17:14 +0000] [50] [INFO] Booting worker with pid: 50
[2019-04-15 20:17:15 +0000] [52] [INFO] Booting worker with pid: 52
[2019-04-15 20:17:15 +0000] [54] [INFO] Booting worker with pid: 54
[2019-04-15 20:17:15 +0000] [62] [INFO] Booting worker with pid: 62
@zmjjmz
zmjjmz / failure_stacktrace
Created April 5, 2019 21:03
new glue failure
An error was encountered:
Session 0 unexpectedly reached final status 'dead'. See logs:
stdout:
stderr:
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/share/aws/glue/etl/jars/glue-assembly.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/lib/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
@zmjjmz
zmjjmz / repro.py
Last active April 4, 2019 19:52
PyArrow chunked array output thingie
import os
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
import numpy as np
from tqdm import tqdm
TEST_DIR = 'jaggedbug_testpath'
@zmjjmz
zmjjmz / tokenize_layer_tf2.py
Last active March 8, 2019 23:15
TF2 Lookup Table attempt
import itertools
import numpy
import tensorflow
class TokenizeLookupLayer(tensorflow.keras.layers.Layer):
"""
Layer that encapsulates the following:
- Tokenizing sentences by space (or given delimiter)
- Looking up the words with a given vocabulary list / table
@zmjjmz
zmjjmz / tf2_upgrade_test.py
Created March 7, 2019 23:39
TF2 Upgrade Script testing - part 1
import itertools
import numpy
import tensorflow
class TokenizeLookupLayer(tensorflow.keras.layers.Layer):
"""
Layer that encapsulates the following:
- Tokenizing sentences by space (or given delimiter)
- Looking up the words with a given vocabulary list / table
@zmjjmz
zmjjmz / endtoend_estimator_keras_repro.py
Created January 26, 2018 16:33
Keras estimator bug repro
from __future__ import division
import os
import numpy
import tensorflow
from tensorflow.python.keras._impl import keras
from tensorflow.python.estimator.export.export_output import PredictOutput
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
from tensorflow.python.saved_model import signature_constants
@zmjjmz
zmjjmz / ml_utils.py
Created December 19, 2017 18:23
shitty lookup layer
class TokenizeLookupLayer(keras.layers.Layer):
"""
Layer that encapsulates the following:
- Tokenizing sentences by space (or given delimiter)
- Looking up the words with a given vocabulary list / table
- Resetting the shape of the above to be batch_size x pad_len (using dark magic)
# Input Shape
2D string tensor with shape `(batch_size, 1)`
# Output Shape
2D int32 tensor with shape `(batch_size, pad_len)`
@zmjjmz
zmjjmz / sagemaker_multiin_repro.py
Created September 11, 2018 23:28
Sagemaker Multi-input repro
import os
import json
import numpy
import tensorflow
from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn
print("Tensorflow version: {0}".format(tensorflow.VERSION))
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'