Skip to content

Instantly share code, notes, and snippets.

@jheth
Last active January 3, 2018 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jheth/1e74039003c52cb46a16e9eb799846a4 to your computer and use it in GitHub Desktop.
Save jheth/1e74039003c52cb46a16e9eb799846a4 to your computer and use it in GitHub Desktop.
GoLang CleanString
// Hacky way to strip unicode characters.
func CleanText(text string, maxLength int) string {
if len(text) < 5 {
return ""
}
if strings.Contains(text, "\n") {
sections := strings.Split(text, "\n")
newText := sections[0]
for idx, s := range sections {
// Append sections until we reach the max length
if idx > 0 && len(newText) < maxLength {
newText = newText + " " + s
}
}
text = newText
}
var charMap = map[string]string{
"′": "'",
"|": "",
"\u0026": "and", // ampersand
"\u1ebd": "e",
"\u200b": " ",
"\u200e": " ",
"\u2010": "-",
"\u2013": "-",
"\u2014": "-",
"\u2018": "'",
"\u2019": "'",
"\u2022": "-",
"\u2026": "...",
"\u2028": "",
"\u2033": "\"",
"\u2034": "\"",
"\u2035": "'",
"\u2036": "\"",
"\u2037": "\"",
"\u2038": ".",
"\u2044": "/",
"\u201a": ",",
"\u201b": "'",
"\u201c": "\"",
"\u201d": "\"",
"\u201e": "\"",
"\u201f": "\"",
"\u2122": "",
"\u2600": "",
"\u263a": "",
"\u26fa": "",
"\u27a2": ">",
"\ufe0f": "",
"\xa0": " ",
"\xa2": "",
"\xae": "",
"\xbd": "",
"\xde": "",
"\xe2": "",
"\xe9": "",
"\xfc": "u",
"\U0001f44c": "",
"\U0001f44d": "",
"\U0001f642": "",
"\U0001f601": "",
"\U0001f690": "",
"\U0001f334": "",
"\U0001f3dd": "",
"\U0001f3fd": "",
"\U0001f3d6": "",
"\U0001f3a3": "",
"\U0001f525": "", // flame
"\U0001f60a": "", // smiley
}
// I initially attempted to use strings.Replace but it gave intermittent results.
// It also had to scan the entire string for each character to replace.
// With this approach you scan the string once, replacing all characters.
newText := ""
for _, c := range text {
newC, ok := charMap[string(c)]
// If not found, use the original
if !ok {
newC = string(c)
}
newText = newText + newC
}
text = newText
if len(text) > maxLength {
return text[0:maxLength-3] + "..."
}
return text
}
@jheth
Copy link
Author

jheth commented Jan 3, 2018

func TestCleanText(t *testing.T) {
	type args struct {
		text      string
		maxLength int
	}
	tests := []struct {
		name string
		args args
		want string
	}{
		{"Replace Ampersand", args{"Jack \u0026 Jill", 150}, "Jack and Jill"},
		{"Replace Nothing", args{"I'm here for you", 150}, "I'm here for you"},
		{"Combine new lines", args{"This is the first line.\nThis is the second line.", 150}, "This is the first line. This is the second line."},
		{"Special tick", args{"19′ International Signature model", 150}, "19' International Signature model"},
		{"Replace Thumbsup", args{"outdoors! ", 150}, "outdoors! "}, // Two unicode characters. Thumbs-up + Color patch
		{"Replace Flame", args{"outdoors! ", 150}, "outdoors! "},
		{"Replace Smiley", args{"outdoors! ", 150}, "outdoors! "},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			if got := utils.CleanText(tt.args.text, tt.args.maxLength); got != tt.want {
				t.Errorf("CleanText() = %v, want %v", got, tt.want)
			}
		})
	}
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment