Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_local_dataset(filename): | |
"""Make a TensorFlow Arrow Dataset that reads from a local CSV file.""" | |
# Read the local file and get a record batch iterator | |
batch_iter = read_and_process(filename) | |
# Create the Arrow Dataset as a stream from local iterator of record batches | |
ds = arrow_io.ArrowStreamDataset.from_record_batches( | |
batch_iter, | |
output_types=(tf.int64, tf.float64, tf.float64), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_remote_dataset(endpoint): | |
"""Make a TensorFlow Arrow Dataset that reads from a remote Arrow stream.""" | |
# Create the Arrow Dataset from a remote host serving a stream | |
ds = arrow_io.ArrowStreamDataset( | |
[endpoint], | |
columns=(0, 1, 2), | |
output_types=(tf.int64, tf.float64, tf.float64), | |
output_shapes=(tf.TensorShape([]), tf.TensorShape([]), tf.TensorShape([])), | |
batch_mode='auto') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def read_and_process(filename): | |
"""Read the given CSV file and yield processed Arrow batches.""" | |
# Read a CSV file into an Arrow Table with threading enabled and | |
# set block_size in bytes to break the file into chunks for granularity, | |
# which determines the number of batches in the resulting pyarrow.Table | |
opts = pyarrow.csv.ReadOptions(use_threads=True, block_size=4096) | |
table = pyarrow.csv.read_csv(filename, opts) | |
# Fit the feature transform |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow_io.arrow as arrow_io | |
ds = arrow_io.ArrowDataset.from_pandas( | |
df, | |
batch_size=2, | |
preserve_index=False) | |
# Make an iterator to the dataset | |
ds_iter = iter(ds) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
data = {'label': np.random.binomial(1, 0.5, 10)} | |
data['x0'] = np.random.randn(10) + 5 * data['label'] | |
data['x1'] = np.random.randn(10) + 5 * data['label'] | |
df = pd.DataFrame(data) | |
print(df.head()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow_io.arrow as arrow_io | |
from pyarrow.feather import write_feather | |
# Write the Pandas DataFrame to a Feather file | |
write_feather(df, '/path/to/df.feather') | |
# Create the dataset with one or more filenames | |
ds = arrow_io.ArrowFeatherDataset( | |
['/path/to/df.feather'], | |
columns=(0, 1, 2), |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow_io.arrow as arrow_io | |
ds = arrow_io.ArrowStreamDataset.from_pandas( | |
df, | |
batch_size=2, | |
preserve_index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def model_fit(ds): | |
"""Create and fit a Keras logistic regression model.""" | |
# Build the Keras model | |
model = tf.keras.Sequential() | |
model.add(tf.keras.layers.Dense(1, input_shape=(2,), | |
activation='sigmoid')) | |
model.compile(optimizer='sgd', loss='mean_squared_error', | |
metrics=['accuracy']) |