This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from scipy.optimize import minimize | |
#alloff contains 5 fold prediction on all dataset (avoid data leak) | |
def min_func(K): | |
ypredtrain = 0 | |
for a in range(len(alloof)): | |
ypredtrain += K[a]*alloof[a] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://www.kaggle.com/code/cdeotte/forward-selection-oof-ensemble-0-942-private/notebook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Finally we have more than 1000 features in all. | |
Ok lets get a little bit more technical: | |
Overview of solution | |
Preprocessing | |
1.remove punctuations | |
2.using porter stemmer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import os | |
from transformers import * | |
from sklearn.model_selection import KFold, StratifiedKFold | |
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score | |
import random |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Squeeze(nn.Module): | |
def __init__(self, dims=-1): | |
super().__init__() | |
self.dims = dims | |
def forward(self, x): | |
return x.squeeze(self.dims) | |
class AttentionHead(nn.Module): | |
def __init__(self, in_features, hidden_dim, num_targets): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def groupby(train, test, col): | |
res = train.groupby(col)['Price'].agg(['min','mean','max']) | |
train[f'{col}_Min'] = train[col].map(res['min']) | |
train[f'{col}_Mean'] = train[col].map(res['mean']) | |
train[f'{col}_Max'] = train[col].map(res['max']) | |
test[f'{col}_Min'] = test[col].map(res['min']) | |
test[f'{col}_Mean'] = test[col].map(res['mean']) | |
test[f'{col}_Max'] = test[col].map(res['max']) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def extract_w_h(image_id_check): | |
# Check weight height order | |
w, h = test_df.loc[test_df.id==image_id,['dim1', 'dim0']].values[0] | |
return w, h | |
def extract_data(data_list, w, h): | |
labels = [] | |
scores_list=[] | |
boxes_list = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
https://www.kaggle.com/sreevishnudamodaran/siim-mmdetection-cascadercnn-weight-bias/notebook |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class LitModel(nn.Module): | |
def __init__(self): | |
super().__init__() | |
config = AutoConfig.from_pretrained(ROBERTA_PATH) | |
config.update({"output_hidden_states":True, | |
"hidden_dropout_prob": 0.0, | |
"layer_norm_eps": 1e-7}) | |
self.roberta = AutoModel.from_pretrained(ROBERTA_PATH, config=config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import warnings | |
warnings.filterwarnings('ignore') | |
from transformers import (AutoModel,AutoModelForMaskedLM, | |
AutoTokenizer, LineByLineTextDataset, | |
DataCollatorForLanguageModeling, | |
Trainer, TrainingArguments) | |
train_data = pd.read_csv('../input/commonlitreadabilityprize/train.csv') |
NewerOlder