Skip to content

Instantly share code, notes, and snippets.

View zmjjmz's full-sized avatar

Zachary Jablons zmjjmz

View GitHub Profile
@zmjjmz
zmjjmz / tokenize_layer_tf2.py
Last active March 8, 2019 23:15
TF2 Lookup Table attempt
import itertools
import numpy
import tensorflow
class TokenizeLookupLayer(tensorflow.keras.layers.Layer):
"""
Layer that encapsulates the following:
- Tokenizing sentences by space (or given delimiter)
- Looking up the words with a given vocabulary list / table
@zmjjmz
zmjjmz / repro.py
Last active April 4, 2019 19:52
PyArrow chunked array output thingie
import os
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
import numpy as np
from tqdm import tqdm
TEST_DIR = 'jaggedbug_testpath'
@zmjjmz
zmjjmz / gluedevendpt.py
Last active October 15, 2020 14:34
glue_devendpt
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from pyspark.sql import functions as sf
from pyspark.sql import types as st
from awsglue.dynamicframe import DynamicFrame
@zmjjmz
zmjjmz / failure_stacktrace
Created April 5, 2019 21:03
new glue failure
An error was encountered:
Session 0 unexpectedly reached final status 'dead'. See logs:
stdout:
stderr:
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/share/aws/glue/etl/jars/glue-assembly.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/lib/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
@zmjjmz
zmjjmz / main log
Created April 15, 2019 20:35
batch transform logs
[2019-04-15 20:17:14 +0000] [22] [INFO] Starting gunicorn 19.9.0
[2019-04-15 20:17:14 +0000] [22] [INFO] Listening at: unix:/tmp/gunicorn.sock (22)
[2019-04-15 20:17:14 +0000] [22] [INFO] Using worker: gevent
[2019-04-15 20:17:14 +0000] [33] [INFO] Booting worker with pid: 33
[2019-04-15 20:17:14 +0000] [34] [INFO] Booting worker with pid: 34
[2019-04-15 20:17:14 +0000] [42] [INFO] Booting worker with pid: 42
[2019-04-15 20:17:14 +0000] [50] [INFO] Booting worker with pid: 50
[2019-04-15 20:17:15 +0000] [52] [INFO] Booting worker with pid: 52
[2019-04-15 20:17:15 +0000] [54] [INFO] Booting worker with pid: 54
[2019-04-15 20:17:15 +0000] [62] [INFO] Booting worker with pid: 62
@zmjjmz
zmjjmz / breakage
Created April 30, 2019 21:40
from_json nightmare
An error occurred while calling o4971.count.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 3 in stage 255.0 failed 4 times, most recent failure: Lost task 3.3 in stage 255.0 (TID 840, ip-172-32-98-36.ec2.internal, executor 1): java.lang.ClassCastException: java.lang.Boolean cannot be cast to java.lang.String
at org.apache.spark.sql.catalyst.json.JSONOptions$$anonfun$27.apply(JSONOptions.scala:84)
at scala.Option.map(Option.scala:146)
at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:84)
at org.apache.spark.sql.catalyst.json.JSONOptions.<init>(JSONOptions.scala:43)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.parser$lzycompute(jsonExpressions.scala:555)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.parser(jsonExpressions.scala:552)
at org.apache.spark.sql.catalyst.expressions.JsonToStructs.nullSafeEval(jsonExpressions.scala:585)
at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:331)
@zmjjmz
zmjjmz / documented_example.py
Created July 11, 2019 22:02
TF keras export usage
import tensorflow as tf
# Create a tf.keras model.
print(tf.version.VERSION)
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=[10]))
model.summary()
# Save the tf.keras model in the SavedModel format.
saved_to_path = tf.keras.experimental.export(
@zmjjmz
zmjjmz / keras.py
Created August 14, 2019 19:22
Vespa Keras weirdness
import keras
import numpy as np
input_l = keras.Input(shape=(1,), name='input')
layer_1 = keras.layers.Dense(1, activation='relu', name='layer_1')(input_l)
output_l = keras.layers.Dense(1, activation='linear', name='output')(layer_1)
model = keras.Model(inputs=[input_l], outputs=[output_l])
model.compile(loss='mean_absolute_error', optimizer='rmsprop')
x = np.array(np.arange(1, 100000))
@zmjjmz
zmjjmz / export_error
Created August 14, 2019 21:17
Vespa Keras tf experiment export_saved_model issues
{'error-code': 'INVALID_APPLICATION_PACKAGE',
'message': 'Invalid application package: default.default: Error loading '
'model: Could not import TensorFlow model from directory '
"'/opt/vespa/var/db/vespa/config_server/serverdb/tenants/default/sessions/175/.preprocessed/models/plike_test/tf114_export': "
"_output_shapes attribute of 'init_1' does not exist"}
@zmjjmz
zmjjmz / Error
Last active December 18, 2019 20:30
Dynamic Slices JAX
Traceback (most recent call last):
File "jax_models.py", line 232, in <module>
shuffle=True,
File "jax_models.py", line 181, in fit
voter_indices, target_indices, ratings, batch_size, batched_dataset_size)
File "/home/u1/zach/proj/dataplayground3/lib/python3.5/site-packages/jax/api.py", line 150, in f_jitted
out = xla.xla_call(flat_fun, *args_flat, device=device, backend=backend)
File "/home/u1/zach/proj/dataplayground3/lib/python3.5/site-packages/jax/core.py", line 592, in call_bind
outs = primitive.impl(f, *args, **params)
File "/home/u1/zach/proj/dataplayground3/lib/python3.5/site-packages/jax/interpreters/xla.py", line 400, in _xla_call_impl