Skip to content

Instantly share code, notes, and snippets.

View Muhammad4hmed's full-sized avatar
💻
Programming

Muhammad Ahmed Muhammad4hmed

💻
Programming
View GitHub Profile
from scipy.optimize import minimize
#alloff contains 5 fold prediction on all dataset (avoid data leak)
def min_func(K):
ypredtrain = 0
for a in range(len(alloof)):
ypredtrain += K[a]*alloof[a]
https://www.kaggle.com/code/cdeotte/forward-selection-oof-ensemble-0-942-private/notebook
Finally we have more than 1000 features in all.
Ok lets get a little bit more technical:
Overview of solution
Preprocessing
1.remove punctuations
2.using porter stemmer
import numpy as np
import pandas as pd
import os
from transformers import *
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error, accuracy_score, roc_auc_score
import random
class Squeeze(nn.Module):
def __init__(self, dims=-1):
super().__init__()
self.dims = dims
def forward(self, x):
return x.squeeze(self.dims)
class AttentionHead(nn.Module):
def __init__(self, in_features, hidden_dim, num_targets):
def groupby(train, test, col):
res = train.groupby(col)['Price'].agg(['min','mean','max'])
train[f'{col}_Min'] = train[col].map(res['min'])
train[f'{col}_Mean'] = train[col].map(res['mean'])
train[f'{col}_Max'] = train[col].map(res['max'])
test[f'{col}_Min'] = test[col].map(res['min'])
test[f'{col}_Mean'] = test[col].map(res['mean'])
test[f'{col}_Max'] = test[col].map(res['max'])
def extract_w_h(image_id_check):
# Check weight height order
w, h = test_df.loc[test_df.id==image_id,['dim1', 'dim0']].values[0]
return w, h
def extract_data(data_list, w, h):
labels = []
scores_list=[]
boxes_list = []
https://www.kaggle.com/sreevishnudamodaran/siim-mmdetection-cascadercnn-weight-bias/notebook
class LitModel(nn.Module):
def __init__(self):
super().__init__()
config = AutoConfig.from_pretrained(ROBERTA_PATH)
config.update({"output_hidden_states":True,
"hidden_dropout_prob": 0.0,
"layer_norm_eps": 1e-7})
self.roberta = AutoModel.from_pretrained(ROBERTA_PATH, config=config)
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
from transformers import (AutoModel,AutoModelForMaskedLM,
AutoTokenizer, LineByLineTextDataset,
DataCollatorForLanguageModeling,
Trainer, TrainingArguments)
train_data = pd.read_csv('../input/commonlitreadabilityprize/train.csv')