Skip to content

Instantly share code, notes, and snippets.

@shubham0204
Created May 1, 2019 03:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save shubham0204/e0b37faaac32448fa53a10ccbc9ff805 to your computer and use it in GitHub Desktop.
Save shubham0204/e0b37faaac32448fa53a10ccbc9ff805 to your computer and use it in GitHub Desktop.
fun tokenize ( message : String ): IntArray {
val parts : List<String> = message.split(" " )
val tokenizedMessage = ArrayList<Int>()
for ( part in parts ) {
if (part.trim() != ""){
var index : Int? = 0
if ( vocabData!![part] == null ) {
index = 0
}
else{
index = vocabData!![part]
}
tokenizedMessage.add( index!! )
}
}
return tokenizedMessage.toIntArray()
}
fun padSequence ( sequence : IntArray ) : IntArray {
val maxlen = this.maxlen
if ( sequence.size > maxlen!!) {
return sequence.sliceArray( 0..maxlen )
}
else if ( sequence.size < maxlen ) {
val array = ArrayList<Int>()
array.addAll( sequence.asList() )
for ( i in array.size until maxlen ){
array.add(0)
}
return array.toIntArray()
}
else{
return sequence
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment