Created
June 29, 2015 16:19
-
-
Save djhurio/7126a428ab91cf86811f to your computer and use it in GitHub Desktop.
Izvelk ciema vai pilsētas nosaukumu no adreses
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extr.ciems <- function(adr_txt) { | |
# Konvertē visu uz mazajiem burtiem | |
adr_txt <- tolower(adr_txt) | |
# Teksta fragmenti, kas neder | |
pattern <- " nov| pag|\"|[0-9]|\\." | |
# Sadala adresi pa fragmentiem, izvēlas derīgos, apgriež otrādi un paņem 1. | |
x <- sapply(strsplit(adr_txt, split = ", "), | |
function(txt) trim(rev(grep(pattern, txt, | |
value = T, invert = T))[1])) | |
# Izlabo kļūdu, kad beidzas ar ā | |
y <- gsub("ā$", "a", x) | |
# Atgriež NA vai ciema nosaukumu ar pirmo lielo burtu | |
return(ifelse(is.na(y), NA, gsub("(^)(.)", "\\1\\U\\2", y, perl = T))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment