Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save suriyadeepan/7bf16d7dae6c92004495eaef44618169 to your computer and use it in GitHub Desktop.
Save suriyadeepan/7bf16d7dae6c92004495eaef44618169 to your computer and use it in GitHub Desktop.
prep.py
def splitStringList(X):
newX = []
for x in X:
newx = []
for i in re.split('[^a-z]+',x):
if i:
newx.append(i)
newX.append(newx)
return newX
def cleanString(x):
newx = []
for i in re.split('[^a-z]+',x):
if i:
newx.append(i)
return newx
def vectorize_stories(inX, inXS, inXQ, inY, word2idx, aword2idx, sq_maxlen, story_maxlen, query_maxlen):
X = []
Xs = []
Xq = []
Y = []
for story_query, story, query, answer in zip(inX, inXS, inXQ, inY):
x = [word2idx[w] for w in story_query]
xs = [word2idx[w] for w in story]
xq = [word2idx[w] for w in query]
y = aword2idx[answer]
X.append(x)
Xs.append(x)
Xq.append(xq)
Y.append(y)
return pad_sequences(X, maxlen=sq_maxlen), pad_sequences(Xs, maxlen=story_maxlen), pad_sequences(Xq, maxlen=query_maxlen), np.array(Y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment