Skip to content

Instantly share code, notes, and snippets.

@MSWon
Created September 20, 2019 08:42
Show Gist options
  • Save MSWon/cce57027fff3275846e728ae5dfebf35 to your computer and use it in GitHub Desktop.
Save MSWon/cce57027fff3275846e728ae5dfebf35 to your computer and use it in GitHub Desktop.
shuffling word's character order
import random
import numpy as np
class shuffle():
def __init__(self):
np.random.seed(1234)
def shuffle_string(self, string):
chars = list(string)
random.shuffle(chars)
return ''.join(chars)
def garble_word(self, word):
shuffle_rate = (len(word) // 4) + 1
# No operation needed on sufficiently small words
# (Also, main algorithm requires word length >= 2)
if len(word) <= 3:
return word
# Split word into first & last letter, and middle letters
first, mid, last = word[0:shuffle_rate], word[shuffle_rate:-shuffle_rate], word[-shuffle_rate:]
return first + self.shuffle_string(mid) + last
def shuffle_corpus(self, corpus, vocabulary, shuffle_rate=2):
np.random.seed(1234)
for sent in corpus:
index = [i for i in range(len(sent)) if sent[i] in vocabulary]
r_list = np.random.choice(index, len(index)//shuffle_rate , replace = False)
for j in r_list:
sent[j] = self.garble_word(sent[j])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment