Skip to content

Instantly share code, notes, and snippets.

@statguy
Created February 23, 2018 10:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statguy/8ca45b9c22c8ce207616dd6679eb0a8c to your computer and use it in GitHub Desktop.
Save statguy/8ca45b9c22c8ce207616dd6679eb0a8c to your computer and use it in GitHub Desktop.
# 1. Copy table from http://thai-language.com/ref/starred
# 2. Paste to Google Sheets
# 3. Save as CSV
# 4. Run through top_1000_thai_words.R
# 5. Upload to Anki App via https://api.ankiapp.com/nexus/
library(tidyverse)
library(stringr)
setwd("~/Downloads")
words <- read.csv("Top 1000 Thai words - Sheet1.tsv", sep = '\t', header = FALSE, quote = '', stringsAsFactors = FALSE)
set_tone_word <- function(x) {
tone <- str_sub(x, -1, -1)
word <- str_sub(x, 1, -2)
if (tone == 'H') str_replace_all(str_c(word), c('a'='á', 'e'='é', 'i'='í', 'o'='ó', 'u'='ú'))
else if (tone == 'L') str_replace_all(str_c(word), c('a'='à', 'e'='è', 'i'='ì', 'o'='ò', 'u'='ù'))
else if (tone == 'F') str_replace_all(str_c(word), c('a'='â', 'e'='ê', 'i'='î', 'o'='ô', 'u'='û'))
else if (tone == 'R') str_replace_all(str_c(word), c('a'='ǎ', 'e'='ě', 'i'='ǐ', 'o'='ŏ', 'u'='ǔ'))
else word
}
set_tone <- function(x) {
str_split(x, ' ') %>% lapply(function(x) lapply(x, set_tone_word)) %>% lapply(str_c, collapse = '-')
}
words2 <- words %>% mutate(V2 = set_tone(V2)) %>% unite(thai, V1, V2, V2, sep = ' = ') %>% rename(english = V3)
words2 %>% write.table(file = "top_1000_thai_words.tsv", sep = '\t', row.names = FALSE, col.names = FALSE, quote = FALSE)
words2 %>% select(english, thai) %>% write.table(file = "top_1000_thai_words_flipped.tsv", sep = '\t', row.names = FALSE, col.names = FALSE, quote = FALSE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment