sudo apt update && sudo apt upgrade
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
# Example DataFrame with random target values | |
df = pd.DataFrame({ | |
'label': np.random.normal(size=1000) # 100 random values between 0 and 1 | |
}) | |
# Step 1: Bin the target values to create a frequency distribution | |
df['label_bin'] = pd.cut(df['label'], bins=10) |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
This model integrates the MoE concept within a Transformer architecture. Each token's | |
representation is processed by a subset of experts, determined by the gating mechanism. | |
This architecture allows for efficient and specialized handling of different aspects of the | |
data, aiming for the adaptability and efficiency noted in the Mixtral 8x7B model's design | |
philosophy. The model activates only a fraction of the available experts for each token, | |
significantly reducing the computational resources needed compared to activating all experts | |
for all tokens. | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import scipy.signal | |
from gluonts.dataset.repository import get_dataset, dataset_names | |
from gluonts.dataset.util import to_pandas | |
from gluonts.dataset.common import ListDataset | |
from gluonts.torch import SimpleFeedForwardEstimator | |
from lightning.pytorch.callbacks.early_stopping import EarlyStopping |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Works on gluonts dev branch as of May 30th, 2023 | |
# Assumes "m5-forecasting-accuracy" folder with data next to the script | |
# Data is obtained from https://www.kaggle.com/c/m5-forecasting-accuracy | |
import pandas as pd | |
from pathlib import Path | |
from gluonts.dataset.pandas import PandasDataset | |
# Load data from csv files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os import path | |
from bokeh.models import Button, Div | |
from bokeh.layouts import column | |
from bokeh.document import without_document_lock | |
from bokeh.io import curdoc | |
from zmq_subprocess import ZmqSubProcessClient | |
ok_button = Button(label="ok") | |
div = Div() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import Trainer, TrainingArguments | |
training_args = TrainingArguments( | |
output_dir="./logs/model_name", | |
logging_dir="./logs/runs", | |
overwrite_output_dir=True, | |
do_train=True, | |
per_device_train_batch_size=32, | |
num_train_epochs=1, | |
evaluate_during_training=True, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multimodal_transformers.model import AutoModelWithTabular, TabularConfig | |
from transformers import AutoConfig | |
num_labels = len(np.unique(torch_dataset, labels)) | |
config = AutoConfig.from_pretrained('bert-base-uncased') | |
tabular_config = TabularConfig( | |
num_labels=num_labels, | |
cat_feat_dim=torch_dataset.cat_feats.shape[1], | |
numerical_feat_dim=torch_dataset.numerical_feats.shape[1], | |
combine_feat_method='weighted_feature_sum_on_transformer_cat_and_numerical_feats', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from multimodal_transformers.data import load_data | |
from transformers import AutoTokenizer | |
data_df = pd.read_csv('Womens Clothing E-Commerce Reviews.csv') | |
text_cols = ['Title', 'Review Text'] | |
# The label col is expected to contain integers from 0 to N_classes - 1 | |
label_col = 'Recommended IND' | |
categorical_cols = ['Clothing ID', 'Division Name', 'Department Name', 'Class Name'] | |
numerical_cols = ['Rating', 'Age', 'Positive Feedback Count'] |
NewerOlder