A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
# https://www.kaggle.com/qqgeogor/keras-lstm-attention-glove840b-lb-0-043 | |
class Attention(Layer): | |
def __init__(self, step_dim, | |
W_regularizer=None, b_regularizer=None, | |
W_constraint=None, b_constraint=None, | |
bias=True, **kwargs): | |
self.supports_masking = True | |
self.init = initializers.get('glorot_uniform') | |
self.W_regularizer = regularizers.get(W_regularizer) |
from keras import Sequential | |
from keras.preprocessing.sequence import pad_sequences | |
from sklearn.model_selection import train_test_split | |
from keras.models import Sequential,Model | |
from keras.layers import LSTM, Dense, Bidirectional, Input,Dropout,BatchNormalization | |
from keras import backend as K | |
from keras.engine.topology import Layer | |
from keras import initializers, regularizers, constraints | |
# Definition of model |
from sklearn.model_selection import KFold | |
from sklearn.metrics import * | |
def kf_fit(model, x_train=X, y_train=y, test_data=test): | |
kf = KFold(n_splits=10, shuffle=True, random_state=42069) | |
preds = [] | |
# test_data = pad_sequences(test_data) | |
fold = 0 | |
aucs = 0 | |
for train_idx, val_idx in kf.split(x_train): |
import random | |
import os | |
from shutil import copyfile | |
ori_path = './your-original-path' | |
out_path = './your-output-path' | |
dirs = [os.path.join(ori_path, adir) for adir in os.listdir(ori_path) if not adir.startswith('.')] | |
pure_dirs = [adir for adir in os.listdir(ori_path) if not adir.startswith('.')] | |
for adir, pure_adir in zip(dirs, pure_dirs): | |
print("==> Now processing:", adir) |
import numpy as np | |
import torch | |
from models import miracle_net | |
from config import Config | |
opt=Config() | |
net = miracle_net.MiracleNet(opt) | |
import numpy as np | |
from torch.utils.data import DataLoader | |
from torch.utils.data import Dataset | |
class Template(Dataset): |
with open(file_path) as f: | |
for line in f: | |
j_content = json.loads(line) |
import collections, numpy | |
a = numpy.array([0, 3, 0, 1, 0, 1, 2, 1, 0, 0, 0, 0, 1, 3, 4]) | |
collections.Counter(a) | |
# [Out]: Counter({0: 7, 1: 4, 3: 2, 2: 1, 4: 1}) |
from wordcloud import WordCloud, STOPWORDS | |
# Thanks : https://www.kaggle.com/aashita/word-clouds-of-various-shapes ## | |
def plot_wordcloud(text, mask=None, max_words=200, max_font_size=100, figure_size=(24.0,16.0), | |
title = None, title_size=40, image_color=False): | |
stopwords = set(STOPWORDS) | |
more_stopwords = {'one', 'br', 'Po', 'th', 'sayi', 'fo', 'Unknown'} | |
stopwords = stopwords.union(more_stopwords) | |
wordcloud = WordCloud(background_color='black', |
import os | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
def plot_history(history, save_path:str=None, figsize = (20, 9)): | |
f, axes = plt.subplots(1, 2, figsize=figsize) | |
sns.lineplot(range(1, history.epochs+1), history.history['acc'], label='Train Accuracy', ax=axes[0]) | |
sns.lineplot(range(1, history.epochs+1), history.history['val_acc'], label='Val Accuracy', ax=axes[0]) | |
sns.lineplot(range(1, history.epochs+1), history.history['loss'], label='Train Loss', ax=axes[1]) | |
sns.lineplot(range(1, history.epochs+1), history.history['val_loss'], label='Val Loss', ax=axes[1]) |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)