Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
North Wales Tech JVM Languages tasks
import com.eclipsesource.json.Json
import com.eclipsesource.json.JsonObject
import com.eclipsesource.json.JsonValue
// Recursively get JSON values for all properties in a JSON object with the given key name
fun getAllWithName(jsonObj : JsonObject, keyName : String) : ArrayList<JsonValue> {
val keys = jsonObj.names()
return keys.fold(ArrayList<JsonValue>()) { acc, key ->
val value = jsonObj.get(key)
if (key == keyName) {
acc.add(value)
} else if (value.isObject) {
acc.addAll(getAllWithName(value.asObject(), keyName))
} else if (value.isArray) {
val items = value.asArray()
acc.addAll(items.filter { item -> item.isObject }
.map { item -> getAllWithName(item.asObject(), keyName) }
.flatten())
}
acc
}
}
fun main(args: Array<String>) {
if (args.size != 1) throw IllegalArgumentException("Too many arguments")
val pageTitle = args[0]
val apiUrl = "https://en.wikipedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=json&titles=$pageTitle"
val jsonText = java.net.URL(apiUrl).readText()
val jsonObj = Json.parse(jsonText).asObject()
val texts = getAllWithName(jsonObj, "*").map(JsonValue::asString)
val wordsRegex = Regex("\\p{L}+") // All letters as defined by Unicode
val wordsMap = texts.fold(HashMap<String, Int>()) { map, text ->
val words = wordsRegex.findAll(text).map { x -> x.value.toLowerCase() }
words.forEach { word -> map[word] = map.getOrDefault(word, 0) + 1 }
map
}
val allWords = wordsMap.keys
val lengthOfLongestWords = allWords.fold(-1) { maxLength, word -> if (word.length > maxLength) word.length else maxLength }
val longestWords = allWords.filter { word -> word.length == lengthOfLongestWords }
val wordsToExclude = sortedSetOf( "the", "to", "of", "and", "a", "is", "that", "in", "be", "for")
val mostCommonWords = allWords
.filter { word -> !wordsToExclude.contains(word.toLowerCase()) }
.sortedByDescending { word -> wordsMap.getOrDefault(word, -1) }
.take(10)
println("Longest word(s): ${longestWords.joinToString(", ")}")
println("Most common words: ${mostCommonWords.joinToString(", ")}")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment