This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y_pred=model2.predict(X_test) | |
y = [] | |
for i in y_pred: | |
if i <= 0.5 : | |
y.append(0) | |
else : | |
y.append(1) | |
np.savetxt("foo.csv", y, delimiter="\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model = build_model1(lr = 1e-3, lr_d = 1e-10, units = 128, spatial_dr = 0.5, kernel_size1=4, kernel_size2=4, dense_units=64, dr=0.2, conv_size=32) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def build_model1(lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, kernel_size1=3, kernel_size2=2, dense_units=128, dr=0.1, conv_size=32): | |
file_path = "best_model.hdf5" | |
check_point = ModelCheckpoint(file_path, monitor = "val_loss", verbose = 1, | |
save_best_only = True, mode = "min") | |
early_stop = EarlyStopping(monitor = "val_loss", mode = "min", patience = 3) | |
inp = Input(shape = (max_len,)) | |
x = Embedding(30001, embed_size, weights = [embedding_matrix], trainable = False)(inp) | |
x1 = SpatialDropout1D(spatial_dr)(x) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
word_index = tk.word_index | |
nb_words = min(max_features, len(word_index)) | |
embedding_matrix = np.zeros((nb_words + 1, embed_size)) | |
for word, i in word_index.items(): | |
if i >= max_features: continue | |
embedding_vector = embedding_index.get(word) | |
if embedding_vector is not None: embedding_matrix[i] = embedding_vector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
embedding_path = "path-to-word-embedding" | |
embed_size = "dimension of embedding vectors" | |
max_features = 30000 | |
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32') | |
embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tk = Tokenizer(lower = True, filters='') | |
tk.fit_on_texts(full_text) | |
train_tokenized = tk.texts_to_sequences(train['tweet']) | |
test_tokenized = tk.texts_to_sequences(test['tweet']) | |
max_len = 50 | |
X_train = pad_sequences(train_tokenized, maxlen = max_len) | |
X_test = pad_sequences(test_tokenized, maxlen = max_len) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
y = train['sentiment'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
full_text = list(train['tweet'].values) + list(test['tweet'].values) | |
full_text = [i.lower() for i in full_text if i not in stopwords.words('english') and i not in ['.',',','/','@','"','&','<br />','+/-','zzzzzzzzzzzzzzzzz',':-D',':D',':P',':)','!',';']] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train['tweet'] = train['tweet'].apply(lambda x : ' '.join([w for w in x.split() if not w.startswith('@') ]) ) | |
test['tweet'] = test['tweet'].apply(lambda x : ' '.join([w for w in x.split() if not w.startswith('@') ]) ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
train = pd.read_csv("../input/question2/train.csv",sep=';') | |
test = pd.read_csv("../input/question2/test.csv",sep=';',quoting=csv.QUOTE_NONE) | |
del test['Unnamed: 1'] |
NewerOlder