Skip to content

Instantly share code, notes, and snippets.

@Elfsong
Created January 7, 2019 16:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Elfsong/493573eccacd194d2e2d6d45bec81f2f to your computer and use it in GitHub Desktop.
Save Elfsong/493573eccacd194d2e2d6d45bec81f2f to your computer and use it in GitHub Desktop.
Sentence Level scenario detector_CNN #CNN #NLP
import os
import numpy as np
import pandas as pd
from bert_serving.client import BertClient
from keras.layers import Conv1D
from keras.layers import Dense, Activation, Dropout, Flatten, AveragePooling1D
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.utils import np_utils
np.random.seed(1337)
bc = BertClient(check_version=False)
sentence_path = os.path.join("/Users/elfsong/PycharmProjects/BERT_demo", "sentence_class.xls")
data_frame = pd.read_excel(sentence_path, sheet_name='sheet1')
train_data_list = list()
train_lable_list = list(data_frame["type"])
for sentence in data_frame["sentence"]:
print(sentence)
result = bc.encode([sentence])
train_data_list += [result[0]]
# 数据预处理
X_train = np.array(train_data_list)
X_train = np.expand_dims(X_train, 2)
y_train = np_utils.to_categorical(train_lable_list, num_classes=11)
print(X_train.shape)
print(y_train.shape)
# 模型构建
model = Sequential([
Conv1D(filters=5, kernel_size=5, strides=1, padding='valid', input_shape=(768, 1), name="Convolution_Layer_1"),
AveragePooling1D(pool_size=5, strides=1, padding="valid", name="Pooling_Layer_1"),
Conv1D(filters=5, kernel_size=5, strides=1, padding='valid', name="Convolution_Layer_2"),
AveragePooling1D(pool_size=5, strides=1, padding="valid", name="Pooling_Layer_2"),
Flatten(name="Flatten_Layer"),
Dense(256, input_dim=3760, name="Dense_Layer_1"),
Activation('relu'),
Dropout(0.1),
Dense(32, input_dim=256, name="Dense_Layer_2"),
Activation('relu'),
Dropout(0.1),
Dense(11, input_dim=32, name="Dense_Layer_3"),
Activation('softmax'),
])
# 激活RMS优化器
rmsprop = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0)
model.compile(optimizer=rmsprop,
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(X_train, y_train, epochs=120, batch_size=8)
def get_result(result):
max_index = np.argmax(result)
category_list = ["None", "city", "forest", "default", "flatland", "river", "college", "town", "mountain", "ocean",
"plaza"]
return category_list[max_index]
sentence_vector = bc.encode(["猫爷爷带着[小鸭子]来到了动物游乐场。"])
sentence_vector = np.expand_dims(sentence_vector, 2)
result = model.predict(sentence_vector)
print(get_result(result))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment