Skip to content

Instantly share code, notes, and snippets.

@katorly
Last active April 19, 2023 14:57
Show Gist options
  • Save katorly/59030fb08df83af3aaa5a05fceab8946 to your computer and use it in GitHub Desktop.
Save katorly/59030fb08df83af3aaa5a05fceab8946 to your computer and use it in GitHub Desktop.
/**
* Copyright (c) 2023 Katorly (https://github.com/katorly)
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
*/
object textutils {
/**
* Count how many times a word containing one character
* or two characters appears in your article.
*
* Only counts the word that appears more than one time.
*/
fun count() {
// Put the article you wanna scan
val text = """
你的文章文字
放在这里
啊啊啊啊啊
""".trimIndent()
// Skip these words you dont want
val skip = listOf(""," ",",",".","!","?","\'","\"","(",")","\\","/",":",";","-"," ","。",",","!","?","‘","’","“","”",":",";","《","》","1","2","3","4","5","6","7","8","9","0","a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z")
val strings = text.split("\n")
val wordlist: MutableMap<String, Int> = HashMap()
var first = ""
for (string in strings) {
val c: CharArray = string.toCharArray()
val s: MutableList<String> = ArrayList()
for (char in c) s.add(char.toString())
for (char in s) {
var valid = true
// Check if it's in skip list
skip.forEach() {
if (char == it) valid = false
}
if (valid) {
// Check word containing one character
if (!wordlist.contains(char)) {
wordlist[char] = 1
} else {
wordlist[char] = wordlist[char]!! + 1
}
// Check word containing two characters
if (first != "") {
if (!wordlist.contains(first + char)) {
wordlist[first + char] = 1
} else {
wordlist[first + char] = wordlist[first + char]!! + 1
}
}
first = char
} else continue
}
}
// Print the results
wordlist.forEach { (word, count) ->
if (count > 1) {
println("${word}: $count")
}
}
}
}
@katorly
Copy link
Author

katorly commented Apr 19, 2023

Example Output:

文: 2
啊: 5
啊啊: 4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment