Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Error Log file - Ludwig Training
2020-12-20 17:37:16.091097: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
███████████████████████
█ █ █ █ ▜█ █ █ █ █ █
█ █ █ █ █ █ █ █ █ █ ███
█ █ █ █ █ █ █ █ █ ▌ █
█ █████ █ █ █ █ █ █ █ █
█ █ ▟█ █ █ █
███████████████████████
ludwig v0.3.1 - Train
2020-12-20 17:37:17.142341: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2020-12-20 17:37:17.176154: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:17.176730: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:00:04.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.73GiB deviceMemoryBandwidth: 298.08GiB/s
2020-12-20 17:37:17.176779: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-20 17:37:17.181117: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-12-20 17:37:17.184000: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-12-20 17:37:17.184829: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-12-20 17:37:17.188329: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-12-20 17:37:17.189667: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-12-20 17:37:17.195600: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2020-12-20 17:37:17.195717: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:17.196269: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:17.196773: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
Experiment name: experiment
Model name: run
Output directory: results/experiment_run_1
ludwig_version: '0.3.1'
command: ('/usr/local/bin/ludwig train --dataset hootsuite_titles.csv --config '
'config.yaml')
random_seed: 42
dataset: 'hootsuite_titles.csv'
data_format: 'csv'
config: { 'combiner': {'type': 'concat'},
'input_features': [ { 'encoder': 't5',
'level': 'word',
'name': 'Original_Title',
'pretrained_model_name_or_path': 't5-small',
'reduce_output': None,
'tied': None,
'type': 'text'},
{ 'encoder': 't5',
'level': 'word',
'name': 'Keyword',
'pretrained_model_name_or_path': 't5-small',
'reduce_output': None,
'tied': None,
'tied_weights': 'Original_Title',
'type': 'text'}],
'output_features': [ { 'decoder': 'generator',
'dependencies': [],
'level': 'word',
'loss': { 'class_similarities_temperature': 0,
'class_weights': 1,
'confidence_penalty': 0,
'distortion': 1,
'labels_smoothing': 0,
'negative_samples': 0,
'robust_lambda': 0,
'sampler': None,
'type': 'softmax_cross_entropy',
'unique': False,
'weight': 1},
'name': 'Optimized_Title',
'reduce_dependencies': 'sum',
'reduce_input': 'sum',
'type': 'sequence'}],
'preprocessing': { 'audio': { 'audio_feature': {'type': 'raw'},
'audio_file_length_limit_in_s': 7.5,
'in_memory': True,
'missing_value_strategy': 'backfill',
'norm': None,
'padding_value': 0},
'bag': { 'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000,
'tokenizer': 'space'},
'binary': { 'fill_value': 0,
'missing_value_strategy': 'fill_with_const'},
'category': { 'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000},
'date': { 'datetime_format': None,
'fill_value': '',
'missing_value_strategy': 'fill_with_const'},
'force_split': False,
'h3': { 'fill_value': 576495936675512319,
'missing_value_strategy': 'fill_with_const'},
'image': { 'in_memory': True,
'missing_value_strategy': 'backfill',
'num_processes': 1,
'resize_method': 'interpolate',
'scaling': 'pixel_normalization'},
'numerical': { 'fill_value': 0,
'missing_value_strategy': 'fill_with_const',
'normalization': None},
'sequence': { 'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 20000,
'padding': 'right',
'padding_symbol': '<PAD>',
'sequence_length_limit': 256,
'tokenizer': 'space',
'unknown_symbol': '<UNK>',
'vocab_file': None},
'set': { 'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000,
'tokenizer': 'space'},
'split_probabilities': (0.7, 0.1, 0.2),
'stratify': None,
'text': { 'char_most_common': 70,
'char_sequence_length_limit': 1024,
'char_tokenizer': 'characters',
'char_vocab_file': None,
'fill_value': '<UNK>',
'lowercase': True,
'missing_value_strategy': 'fill_with_const',
'padding': 'right',
'padding_symbol': '<PAD>',
'pretrained_model_name_or_path': None,
'unknown_symbol': '<UNK>',
'word_most_common': 20000,
'word_sequence_length_limit': 256,
'word_tokenizer': 'space_punct',
'word_vocab_file': None},
'timeseries': { 'fill_value': '',
'missing_value_strategy': 'fill_with_const',
'padding': 'right',
'padding_value': 0,
'timeseries_length_limit': 256,
'tokenizer': 'space'},
'vector': { 'fill_value': '',
'missing_value_strategy': 'fill_with_const'}},
'training': { 'batch_size': 128,
'bucketing_field': None,
'decay': False,
'decay_rate': 0.96,
'decay_steps': 10000,
'early_stop': 5,
'epochs': 100,
'eval_batch_size': 0,
'gradient_clipping': None,
'increase_batch_size_on_plateau': 0,
'increase_batch_size_on_plateau_max': 512,
'increase_batch_size_on_plateau_patience': 5,
'increase_batch_size_on_plateau_rate': 2,
'learning_rate': 0.001,
'learning_rate_warmup_epochs': 1,
'optimizer': { 'beta_1': 0.9,
'beta_2': 0.999,
'epsilon': 1e-08,
'type': 'adam'},
'reduce_learning_rate_on_plateau': 0,
'reduce_learning_rate_on_plateau_patience': 5,
'reduce_learning_rate_on_plateau_rate': 0.5,
'regularization_lambda': 0,
'regularizer': 'l2',
'staircase': False,
'validation_field': 'combined',
'validation_metric': 'loss'}}
tf_version: '2.3.1'
Using full raw csv, no hdf5 and json file with the same name have been found
Building dataset (it may take a while)
Writing preprocessed dataset cache
Writing train set metadata
Training set: 692
Validation set: 86
Test set: 196
2020-12-20 17:37:22.112612: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2020-12-20 17:37:22.117970: I tensorflow/core/platform/profile_utils/cpu_utils.cc:104] CPU Frequency: 2200000000 Hz
2020-12-20 17:37:22.118161: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x8495dc0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2020-12-20 17:37:22.118191: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version
2020-12-20 17:37:22.248167: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.248841: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x8495f80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-12-20 17:37:22.248878: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Tesla T4, Compute Capability 7.5
2020-12-20 17:37:22.249093: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.249635: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties:
pciBusID: 0000:00:04.0 name: Tesla T4 computeCapability: 7.5
coreClock: 1.59GHz coreCount: 40 deviceMemorySize: 14.73GiB deviceMemoryBandwidth: 298.08GiB/s
2020-12-20 17:37:22.249707: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-20 17:37:22.249750: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
2020-12-20 17:37:22.249777: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcufft.so.10
2020-12-20 17:37:22.249805: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcurand.so.10
2020-12-20 17:37:22.249831: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusolver.so.10
2020-12-20 17:37:22.249855: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcusparse.so.10
2020-12-20 17:37:22.249878: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudnn.so.7
2020-12-20 17:37:22.249965: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.250553: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.251036: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1858] Adding visible gpu devices: 0
2020-12-20 17:37:22.251109: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1
2020-12-20 17:37:22.956132: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1257] Device interconnect StreamExecutor with strength 1 edge matrix:
2020-12-20 17:37:22.956192: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1263] 0
2020-12-20 17:37:22.956204: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1276] 0: N
2020-12-20 17:37:22.956403: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.956985: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:982] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2020-12-20 17:37:22.957515: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1402] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 13936 MB memory) -> physical GPU (device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5)
Downloading: 100% 242M/242M [00:08<00:00, 27.6MB/s]
2020-12-20 17:37:32.305237: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
2020-12-20 17:37:32.449536: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10
All model checkpoint layers were used when initializing TFT5Model.
All the layers of TFT5Model were initialized from the model checkpoint at t5-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5Model for predictions without further training.
All model checkpoint layers were used when initializing TFT5Model.
All the layers of TFT5Model were initialized from the model checkpoint at t5-small.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5Model for predictions without further training.
╒══════════╕
│ TRAINING │
╘══════════╛
Epoch 1
Training: 0% 0/6 [00:00<?, ?it/s]Traceback (most recent call last):
File "/usr/local/bin/ludwig", line 8, in <module>
sys.exit(main())
File "/usr/local/lib/python3.6/dist-packages/ludwig/cli.py", line 146, in main
CLI()
File "/usr/local/lib/python3.6/dist-packages/ludwig/cli.py", line 72, in __init__
getattr(self, args.command)()
File "/usr/local/lib/python3.6/dist-packages/ludwig/cli.py", line 77, in train
train.cli(sys.argv[2:])
File "/usr/local/lib/python3.6/dist-packages/ludwig/train.py", line 412, in cli
train_cli(**vars(args))
File "/usr/local/lib/python3.6/dist-packages/ludwig/train.py", line 197, in train_cli
debug=debug,
File "/usr/local/lib/python3.6/dist-packages/ludwig/api.py", line 469, in train
save_path=model_dir,
File "/usr/local/lib/python3.6/dist-packages/ludwig/models/trainer.py", line 552, in train
self.regularization_lambda
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 780, in __call__
result = self._call(*args, **kwds)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 823, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 697, in _initialize
*args, **kwds))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 2855, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3213, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3075, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 986, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/def_function.py", line 600, in wrapped_fn
return weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/function.py", line 3735, in bound_method_wrapper
return wrapped_fn(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/func_graph.py", line 973, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/ludwig/models/ecd.py:176 train_step *
model_outputs = self((inputs, targets), training=True)
/usr/local/lib/python3.6/dist-packages/ludwig/models/ecd.py:101 call *
encoder_output = encoder(input_values, training=training,
/usr/local/lib/python3.6/dist-packages/ludwig/features/text_feature.py:242 call *
encoder_output = self.encoder_obj(
/usr/local/lib/python3.6/dist-packages/ludwig/encoders/text_encoders.py:640 call *
transformer_outputs = self.transformer(
/usr/local/lib/python3.6/dist-packages/transformers/models/t5/modeling_tf_t5.py:1095 call *
inputs = input_processing(
/usr/local/lib/python3.6/dist-packages/transformers/modeling_tf_utils.py:349 input_processing *
raise ValueError(
ValueError: The following keyword arguments are not supported by this model: ['token_type_ids'].
Training: 0% 0/6 [00:01<?, ?it/s]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment