Skip to content

Instantly share code, notes, and snippets.

View rafbarr's full-sized avatar

Rafael Barreto rafbarr

  • San Francisco Bay Area
View GitHub Profile
@rafbarr
rafbarr / scale_by_min_max_per_key.py
Created May 28, 2020 21:07
Custom TFT scale_by_min_max_per_key
from functools import reduce
import apache_beam as beam
import numpy as np
import tensorflow as tf
import tensorflow_transform as tft
def transform_sparse_values(sp_tensor, trans_fun):
@rafbarr
rafbarr / _feature_hasher.pyx
Created October 2, 2016 09:58
Very efficient implementation of feature hashing for Python.
# distutils: language=c++
import collections
import operator
from cpython.bytes cimport PyBytes_AS_STRING, PyBytes_GET_SIZE
import cython
import numpy as np
import pandas as pd
from libc.stdint cimport uint8_t, uint32_t
#!/bin/bash
//usr/bin/env groovy -cp "$(dirname "$0")" "$0" $@; exit $?
@Grab(group = 'org.apache.avro', module = 'avro', version = '1.8.0')
import org.apache.avro.SchemaValidatorBuilder
def cli = new CliBuilder(stopAtNonOption: false)
cli._(longOpt: 'reference-schemas', args: 1, argName: 'dir', 'directory with the reference schemas')
cli._(longOpt: 'schemas-to-check', args: 1, argName: 'dir', 'directory with the schemas to check')
#!/bin/bash
//usr/bin/env groovy -cp "$(dirname "$0")" "$0" $@; exit $?
if (args.size() == 0) {
println 'missing root dir'
System.exit(1)
}
def linter = new AvroSchemaLinter()
try {
case class SimplePrefixTree(prefix: String, leaves: Seq[SimplePrefixTree])
object SimplePrefixTree {
def apply(stringSet: Set[String]): SimplePrefixTree = {
val longestCommonPrefix = if (!stringSet.isEmpty) {
(stringSet.min, stringSet.max).zipped.takeWhile(v => v._1 == v._2).unzip._1.mkString
} else {
""
}