Skip to content

Instantly share code, notes, and snippets.

View Muhammad4hmed's full-sized avatar
💻
Programming

Muhammad Ahmed Muhammad4hmed

💻
Programming
View GitHub Profile
@Muhammad4hmed
Muhammad4hmed / Basic-Pre-Processing-NLP.py
Last active December 21, 2020 09:00
Basic Pre Processing NLP
import re, string, unicodedata
import nltk
import contractions
import inflect
from nltk import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import LancasterStemmer, WordNetLemmatizer
def clean_text(s):
"""
from sklearn.feature_selection import chi2
from sklearn.feature_selection import f_classif
from sklearn.feature_selection import f_regression
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_selection import mutual_info_regression
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import SelectPercentile
class UnivariateFeatureSelction:
def __init__(self, n_features, problem_type, scoring):
def create_date_featues(df, col):
df['Year'] = pd.to_datetime(df[col]).dt.year
df['Month'] = pd.to_datetime(df[col]).dt.month
df['Day'] = pd.to_datetime(df[col]).dt.day
df['Dayofweek'] = pd.to_datetime(df[col]).dt.dayofweek
df['DayOfyear'] = pd.to_datetime(df[col]).dt.dayofyear
df['Week'] = pd.to_datetime(df[col]).dt.week
df['Quarter'] = pd.to_datetime(df[col]).dt.quarter
df['Is_month_start'] = pd.to_datetime(df[col]).dt.is_month_start
df['Is_month_end'] = pd.to_datetime(df[col]).dt.is_month_end
def rounding(num):
round_num, round_num2 = 0, 0
uniques = np.sort(np.unique(train['price']))
for i,n in enumerate(uniques):
if n > num:
break
round_num = n
round_num2 = uniques[i+1]
return (round_num+round_num2)/2
# dataset.py
import torch
import numpy as np
from PIL import Image
from PIL import ImageFile
from torch.utils.data import DataLoader, Dataset
# sometimes, you will have images without anending bit# this takes care of those kind of (corrupt) images
ImageFile.LOAD_TRUNCATED_IMAGES = True
class Model(nn.Module):
# config.py
# we define all the configuration here
MAX_LEN = 128
TRAIN_BATCH_SIZE = 16
VALID_BATCH_SIZE = 8
EPOCHS = 10
# EMB CNN LSTM
import torch
import torch.nn.functional as F
# config.py
import transformers
# this is the maximum number of tokens in the sentence
MAX_LEN = 512
# batch sizes is small because model is huge!
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 4
# let's train for a maximum of 10 epochs
EPOCHS = 10
# define path to BERT model files
class AttentionModel(nn.Module):
def __init__(self, num_classes = 5,
embed_size = 2560, LSTM_UNITS = 64, pretrained = True, BATCH_SIZE = 4):
super().__init__()
self.batch_size = BATCH_SIZE
self.cnn = timm.create_model('efficientnet_b7', pretrained=pretrained)
self.avgpool = torch.nn.AdaptiveAvgPool2d(1)
class Model(nn.Module):
def __init__(self):
super().__init__()
backbone = timm.create_model(TIMM_MODEL, pretrained=True)
n_features = backbone.head.in_features
self.backbone = nn.Sequential(*backbone.children())[:-2]
self.classifier = nn.Linear(n_features, 5)
self.pool = nn.AdaptiveAvgPool2d((1, 1))
def forward_features(self, x):
import os
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np
import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut