Skip to content

Instantly share code, notes, and snippets.

View arshjat's full-sized avatar
🏠
Working from home

Arsh Panghal arshjat

🏠
Working from home
View GitHub Profile
y_pred=model2.predict(X_test)
y = []
for i in y_pred:
if i <= 0.5 :
y.append(0)
else :
y.append(1)
np.savetxt("foo.csv", y, delimiter="\n")
model = build_model1(lr = 1e-3, lr_d = 1e-10, units = 128, spatial_dr = 0.5, kernel_size1=4, kernel_size2=4, dense_units=64, dr=0.2, conv_size=32)
def build_model1(lr=0.0, lr_d=0.0, units=0, spatial_dr=0.0, kernel_size1=3, kernel_size2=2, dense_units=128, dr=0.1, conv_size=32):
file_path = "best_model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", verbose = 1,
save_best_only = True, mode = "min")
early_stop = EarlyStopping(monitor = "val_loss", mode = "min", patience = 3)
inp = Input(shape = (max_len,))
x = Embedding(30001, embed_size, weights = [embedding_matrix], trainable = False)(inp)
x1 = SpatialDropout1D(spatial_dr)(x)
word_index = tk.word_index
nb_words = min(max_features, len(word_index))
embedding_matrix = np.zeros((nb_words + 1, embed_size))
for word, i in word_index.items():
if i >= max_features: continue
embedding_vector = embedding_index.get(word)
if embedding_vector is not None: embedding_matrix[i] = embedding_vector
embedding_path = "path-to-word-embedding"
embed_size = "dimension of embedding vectors"
max_features = 30000
def get_coefs(word,*arr): return word, np.asarray(arr, dtype='float32')
embedding_index = dict(get_coefs(*o.strip().split(" ")) for o in open(embedding_path))
tk = Tokenizer(lower = True, filters='')
tk.fit_on_texts(full_text)
train_tokenized = tk.texts_to_sequences(train['tweet'])
test_tokenized = tk.texts_to_sequences(test['tweet'])
max_len = 50
X_train = pad_sequences(train_tokenized, maxlen = max_len)
X_test = pad_sequences(test_tokenized, maxlen = max_len)
y = train['sentiment']
@arshjat
arshjat / 1.5.py
Last active January 1, 2019 10:41
full_text = list(train['tweet'].values) + list(test['tweet'].values)
full_text = [i.lower() for i in full_text if i not in stopwords.words('english') and i not in ['.',',','/','@','"','&amp','<br />','+/-','zzzzzzzzzzzzzzzzz',':-D',':D',':P',':)','!',';']]
train['tweet'] = train['tweet'].apply(lambda x : ' '.join([w for w in x.split() if not w.startswith('@') ]) )
test['tweet'] = test['tweet'].apply(lambda x : ' '.join([w for w in x.split() if not w.startswith('@') ]) )
train = pd.read_csv("../input/question2/train.csv",sep=';')
test = pd.read_csv("../input/question2/test.csv",sep=';',quoting=csv.QUOTE_NONE)
del test['Unnamed: 1']