Skip to content

Instantly share code, notes, and snippets.

View codeKgu's full-sized avatar

Ken Gu codeKgu

View GitHub Profile
@codeKgu
codeKgu / train.py
Created October 23, 2020 15:39
Tutorial for multimodal_transformers
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir="./logs/model_name",
logging_dir="./logs/runs",
overwrite_output_dir=True,
do_train=True,
per_device_train_batch_size=32,
num_train_epochs=1,
evaluate_during_training=True,
@codeKgu
codeKgu / model_loading.py
Created October 23, 2020 15:38
Tutorial for multimodal_transformers
from multimodal_transformers.model import AutoModelWithTabular, TabularConfig
from transformers import AutoConfig
num_labels = len(np.unique(torch_dataset, labels))
config = AutoConfig.from_pretrained('bert-base-uncased')
tabular_config = TabularConfig(
num_labels=num_labels,
cat_feat_dim=torch_dataset.cat_feats.shape[1],
numerical_feat_dim=torch_dataset.numerical_feats.shape[1],
combine_feat_method='weighted_feature_sum_on_transformer_cat_and_numerical_feats',
@codeKgu
codeKgu / data_loading.py
Last active August 14, 2023 17:22
Tutorial for multimodal_transformers
import pandas as pd
from multimodal_transformers.data import load_data
from transformers import AutoTokenizer
data_df = pd.read_csv('Womens Clothing E-Commerce Reviews.csv')
text_cols = ['Title', 'Review Text']
# The label col is expected to contain integers from 0 to N_classes - 1
label_col = 'Recommended IND'
categorical_cols = ['Clothing ID', 'Division Name', 'Department Name', 'Class Name']
numerical_cols = ['Rating', 'Age', 'Positive Feedback Count']
self._base_seed = torch.empty((), dtype=torch.int64).random_(generator=loader.generator).item()
@codeKgu
codeKgu / dataset.py
Created July 19, 2020 15:28
snippets for TextGCN blog
import scipy.sparse as sp
import numpy as np
def init_node_feats(self, type, device):
if type == 'one_hot_init':
num_nodes = self.graph.shape[0]
identity = sp.identity(num_nodes)
ind0, ind1, values = sp.find(identity)
inds = np.stack((ind0, ind1), axis=0)
self.node_feats = torch.sparse_coo_tensor(inds, values, device=device, dtype=torch.float)