Skip to content

Instantly share code, notes, and snippets.

Bryan Cutler BryanCutler

Block or report user

Report or block BryanCutler

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@BryanCutler
BryanCutler / tf_arrow_blog_p11.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 11 - Model Training Remote Dataset
View tf_arrow_blog_p11.py
def make_remote_dataset(endpoint):
"""Make a TensorFlow Arrow Dataset that reads from a remote Arrow stream."""
# Create the Arrow Dataset from a remote host serving a stream
ds = arrow_io.ArrowStreamDataset(
[endpoint],
columns=(0, 1, 2),
output_types=(tf.int64, tf.float64, tf.float64),
batch_mode='auto')
@BryanCutler
BryanCutler / tf_arrow_blog_pt10.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 10 - Serve CSV Data
View tf_arrow_blog_pt10.py
def serve_csv_data(ip_addr, port_num, directory):
"""
Create a socket and serve Arrow record batches as a stream read from the
given directory containing CVS files.
"""
# Create the socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.bind((ip_addr, port_num))
sock.listen(1)
@BryanCutler
BryanCutler / tf_arrow_blog_pt9.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 9 - Read and Process Directory
View tf_arrow_blog_pt9.py
def read_and_process_dir(directory):
"""Read a directory of CSV files and yield processed Arrow batches."""
for f in os.listdir(directory):
if f.endswith(".csv"):
filename = os.path.join(directory, f)
for batch in read_and_process(filename):
yield batch
@BryanCutler
BryanCutler / tf_arrow_blog_pt8.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 8 - Run Training Local
View tf_arrow_blog_pt8.py
ds = make_local_dataset(filename)
model = model_fit(ds)
print("Fit model with weights: {}".format(model.get_weights()))
# Fit model with weights:
# [array([[0.7793554 ], [0.61216295]], dtype=float32),
# array([0.03328196], dtype=float32)]
@BryanCutler
BryanCutler / tf_arrow_blog_pt7.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 7 - Model Training Local Dataset
View tf_arrow_blog_pt7.py
def make_local_dataset(filename):
"""Make a TensorFlow Arrow Dataset that reads from a local CSV file."""
# Read the local file and get a record batch iterator
batch_iter = read_and_process(filename)
# Create the Arrow Dataset as a stream from local iterator of record batches
ds = arrow_io.ArrowStreamDataset.from_record_batches(
batch_iter,
output_types=(tf.int64, tf.float64, tf.float64),
@BryanCutler
BryanCutler / tf_arrow_blog_pt6.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 6 - Read and Process CSV File
View tf_arrow_blog_pt6.py
def read_and_process(filename):
"""Read the given CSV file and yield processed Arrow batches."""
# Read a CSV file into an Arrow Table with threading enabled and
# set block_size in bytes to break the file into chunks for granularity,
# which determines the number of batches in the resulting pyarrow.Table
opts = pyarrow.csv.ReadOptions(use_threads=True, block_size=4096)
table = pyarrow.csv.read_csv(filename, opts)
# Fit the feature transform
@BryanCutler
BryanCutler / tf_arrow_blog_pt5.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 5 - Model Definition
View tf_arrow_blog_pt5.py
def model_fit(ds):
"""Create and fit a Keras logistic regression model."""
# Build the Keras model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(1, input_shape=(2,),
activation='sigmoid'))
model.compile(optimizer='sgd', loss='mean_squared_error',
metrics=['accuracy'])
@BryanCutler
BryanCutler / tf_arrow_blog_pt4.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 4 - ArrowStreamDataset
View tf_arrow_blog_pt4.py
import tensorflow_io.arrow as arrow_io
ds = arrow_io.ArrowStreamDataset.from_pandas(
df,
batch_size=2,
preserve_index=False)
@BryanCutler
BryanCutler / tf_arrow_blog_pt3.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 3 - ArrowFeatherDataset
View tf_arrow_blog_pt3.py
import tensorflow_io.arrow as arrow_io
from pyarrow.feather import write_feather
# Write the Pandas DataFrame to a Feather file
write_feather(df, '/path/to/df.feather')
# Create the dataset with one or more filenames
ds = arrow_io.ArrowFeatherDataset(
['/path/to/df.feather'],
columns=(0, 1, 2),
@BryanCutler
BryanCutler / tf_arrow_blog_pt2.py
Last active Aug 5, 2019
TensorFlow Arrow Blog Part 2 - ArrowDataset
View tf_arrow_blog_pt2.py
import tensorflow_io.arrow as arrow_io
ds = arrow_io.ArrowDataset.from_pandas(
df,
batch_size=2,
preserve_index=False)
# Make an iterator to the dataset
ds_iter = iter(ds)
You can’t perform that action at this time.