Skip to content

Instantly share code, notes, and snippets.

@Bhavesh-Joshi
Last active January 12, 2021 07:32
Show Gist options
  • Save Bhavesh-Joshi/4db376e4aa6a2ef808c4e54618acc987 to your computer and use it in GitHub Desktop.
Save Bhavesh-Joshi/4db376e4aa6a2ef808c4e54618acc987 to your computer and use it in GitHub Desktop.
VQA_Ludwig
from ludwig.api import LudwigModel
import logging
import pandas as pd
data_dict = {'Path': ['/data/Antenna.jpg'], 'Question': ['is antenna visible?'], 'Answer': ['No']}
data = pd.DataFrame.from_dict(data_dict)
print(data)
config = {'combiner': {'type': 'concat'},
'input_features': [{'column': 'Path',
'encoder': 'stacked_cnn',
'level': 'word',
'name': 'Path',
'preprocessing': {},
'proc_column': 'Path_mZFLky',
'tied': None,
'type': 'image'},
{'column': 'Question',
'encoder': 'stacked_cnn',
'level': 'word',
'name': 'Question',
'proc_column': 'Question_mZFLky',
'tied': None,
'type': 'text'}],
'output_features': [{'cell_type': 'lstm',
'column': 'Answer',
'decoder': 'generator',
'dependencies': [],
'level': 'word',
'loss': {'class_similarities_temperature': 0,
'class_weights': 1,
'confidence_penalty': 0,
'distortion': 1,
'labels_smoothing': 0,
'negative_samples': 0,
'robust_lambda': 0,
'sampler': None,
'type': 'softmax_cross_entropy',
'unique': False,
'weight': 1},
'name': 'Answer',
'proc_column': 'Answer_mZFLky',
'reduce_dependencies': 'sum',
'reduce_input': 'sum',
'type': 'text'}],
'preprocessing': {'audio': {'audio_feature': {'type': 'raw'},
'audio_file_length_limit_in_s': 7.5,
'in_memory': True,
'missing_value_strategy': 'backfill',
'norm': None,
'padding_value': 0},
'bag': {'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000,
'tokenizer': 'space'},
'binary': {'fill_value': 0,
'missing_value_strategy': 'fill_with_const'},
'category': {'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000},
'date': {'datetime_format': None,
'fill_value': '',
'missing_value_strategy': 'fill_with_const'},
'force_split': False,
'h3': {'fill_value': 576495936675512319,
'missing_value_strategy': 'fill_with_const'},
'image': {'in_memory': True,
'missing_value_strategy': 'backfill',
'num_processes': 1,
'resize_method': 'interpolate',
'scaling': 'pixel_normalization'},
'numerical': {'fill_value': 0,
'missing_value_strategy': 'fill_with_const',
'normalization': None},
'sequence': {'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 20000,
'padding': 'right',
'padding_symbol': '<PAD>',
'sequence_length_limit': 256,
'tokenizer': 'space',
'unknown_symbol': '<UNK>',
'vocab_file': None},
'set': {'fill_value': '<UNK>',
'lowercase': False,
'missing_value_strategy': 'fill_with_const',
'most_common': 10000,
'tokenizer': 'space'},
'split_probabilities': (0.7, 0.1, 0.2),
'stratify': None,
'text': {'char_most_common': 70,
'char_sequence_length_limit': 1024,
'char_tokenizer': 'characters',
'char_vocab_file': None,
'fill_value': '<UNK>',
'lowercase': True,
'missing_value_strategy': 'fill_with_const',
'padding': 'right',
'padding_symbol': '<PAD>',
'pretrained_model_name_or_path': None,
'unknown_symbol': '<UNK>',
'word_most_common': 20000,
'word_sequence_length_limit': 256,
'word_tokenizer': 'space_punct',
'word_vocab_file': None},
'timeseries': {'fill_value': '',
'missing_value_strategy': 'fill_with_const',
'padding': 'right',
'padding_value': 0,
'timeseries_length_limit': 256,
'tokenizer': 'space'},
'vector': {'fill_value': '',
'missing_value_strategy': 'fill_with_const'}},
'training': {'batch_size': 128,
'bucketing_field': None,
'decay': False,
'decay_rate': 0.96,
'decay_steps': 10000,
'early_stop': 5,
'epochs': 1,
'eval_batch_size': 0,
'gradient_clipping': None,
'increase_batch_size_on_plateau': 0,
'increase_batch_size_on_plateau_max': 512,
'increase_batch_size_on_plateau_patience': 5,
'increase_batch_size_on_plateau_rate': 2,
'learning_rate': 0.001,
'learning_rate_warmup_epochs': 1,
'optimizer': {'beta_1': 0.9,
'beta_2': 0.999,
'epsilon': 1e-08,
'type': 'adam'},
'reduce_learning_rate_on_plateau': 0,
'reduce_learning_rate_on_plateau_patience': 5,
'reduce_learning_rate_on_plateau_rate': 0.5,
'regularization_lambda': 0,
'regularizer': 'l2',
'staircase': False,
'validation_field': 'combined',
'validation_metric': 'loss'}}
model = LudwigModel(config,
logging_level=logging.INFO)
model.train(training_set=data, model_name="demo", output_directory="/data/model")
model.save_savedmodel('/data/tensor')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment