Skip to content

Instantly share code, notes, and snippets.

@MajorTal
Created March 24, 2016 08:32
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save MajorTal/67d54887a729b5e5aa85 to your computer and use it in GitHub Desktop.
Save MajorTal/67d54887a729b5e5aa85 to your computer and use it in GitHub Desktop.
from numpy.random import choice as random_choice, randint as random_randint, rand
MAX_INPUT_LEN = 40
AMOUNT_OF_NOISE = 0.2 / MAX_INPUT_LEN
CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .")
def add_noise_to_string(a_string, amount_of_noise):
"""Add some artificial spelling mistakes to the string"""
if rand() < amount_of_noise * len(a_string):
# Replace a character with a random character
random_char_position = random_randint(len(a_string))
a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position + 1:]
if rand() < amount_of_noise * len(a_string):
# Delete a character
random_char_position = random_randint(len(a_string))
a_string = a_string[:random_char_position] + a_string[random_char_position + 1:]
if len(a_string) < MAX_INPUT_LEN and rand() < amount_of_noise * len(a_string):
# Add a random character
random_char_position = random_randint(len(a_string))
a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position:]
if rand() < amount_of_noise * len(a_string):
# Transpose 2 characters
random_char_position = random_randint(len(a_string) - 1)
a_string = (a_string[:random_char_position] + a_string[random_char_position+1] + a_string[random_char_position] +
a_string[random_char_position + 2:])
return a_string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment