This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, string, unicodedata | |
import nltk | |
import contractions | |
import inflect | |
from nltk import word_tokenize, sent_tokenize | |
from nltk.corpus import stopwords | |
from nltk.stem import LancasterStemmer, WordNetLemmatizer | |
def clean_text(s): | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.feature_selection import chi2 | |
from sklearn.feature_selection import f_classif | |
from sklearn.feature_selection import f_regression | |
from sklearn.feature_selection import mutual_info_classif | |
from sklearn.feature_selection import mutual_info_regression | |
from sklearn.feature_selection import SelectKBest | |
from sklearn.feature_selection import SelectPercentile | |
class UnivariateFeatureSelction: | |
def __init__(self, n_features, problem_type, scoring): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_date_featues(df, col): | |
df['Year'] = pd.to_datetime(df[col]).dt.year | |
df['Month'] = pd.to_datetime(df[col]).dt.month | |
df['Day'] = pd.to_datetime(df[col]).dt.day | |
df['Dayofweek'] = pd.to_datetime(df[col]).dt.dayofweek | |
df['DayOfyear'] = pd.to_datetime(df[col]).dt.dayofyear | |
df['Week'] = pd.to_datetime(df[col]).dt.week | |
df['Quarter'] = pd.to_datetime(df[col]).dt.quarter | |
df['Is_month_start'] = pd.to_datetime(df[col]).dt.is_month_start | |
df['Is_month_end'] = pd.to_datetime(df[col]).dt.is_month_end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def rounding(num): | |
round_num, round_num2 = 0, 0 | |
uniques = np.sort(np.unique(train['price'])) | |
for i,n in enumerate(uniques): | |
if n > num: | |
break | |
round_num = n | |
round_num2 = uniques[i+1] | |
return (round_num+round_num2)/2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# dataset.py | |
import torch | |
import numpy as np | |
from PIL import Image | |
from PIL import ImageFile | |
from torch.utils.data import DataLoader, Dataset | |
# sometimes, you will have images without anending bit# this takes care of those kind of (corrupt) images | |
ImageFile.LOAD_TRUNCATED_IMAGES = True | |
class Model(nn.Module): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# config.py | |
# we define all the configuration here | |
MAX_LEN = 128 | |
TRAIN_BATCH_SIZE = 16 | |
VALID_BATCH_SIZE = 8 | |
EPOCHS = 10 | |
# EMB CNN LSTM | |
import torch | |
import torch.nn.functional as F |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# config.py | |
import transformers | |
# this is the maximum number of tokens in the sentence | |
MAX_LEN = 512 | |
# batch sizes is small because model is huge! | |
TRAIN_BATCH_SIZE = 8 | |
VALID_BATCH_SIZE = 4 | |
# let's train for a maximum of 10 epochs | |
EPOCHS = 10 | |
# define path to BERT model files |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class AttentionModel(nn.Module): | |
def __init__(self, num_classes = 5, | |
embed_size = 2560, LSTM_UNITS = 64, pretrained = True, BATCH_SIZE = 4): | |
super().__init__() | |
self.batch_size = BATCH_SIZE | |
self.cnn = timm.create_model('efficientnet_b7', pretrained=pretrained) | |
self.avgpool = torch.nn.AdaptiveAvgPool2d(1) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Model(nn.Module): | |
def __init__(self): | |
super().__init__() | |
backbone = timm.create_model(TIMM_MODEL, pretrained=True) | |
n_features = backbone.head.in_features | |
self.backbone = nn.Sequential(*backbone.children())[:-2] | |
self.classifier = nn.Linear(n_features, 5) | |
self.pool = nn.AdaptiveAvgPool2d((1, 1)) | |
def forward_features(self, x): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from PIL import Image | |
import pandas as pd | |
from tqdm.auto import tqdm | |
import numpy as np | |
import pydicom | |
from pydicom.pixel_data_handlers.util import apply_voi_lut |
OlderNewer