Skip to content

Instantly share code, notes, and snippets.

@OterLabb
Created December 3, 2019 20:45
Show Gist options
  • Save OterLabb/8e3a41d4ac28a27f5113af32233d39c5 to your computer and use it in GitHub Desktop.
Save OterLabb/8e3a41d4ac28a27f5113af32233d39c5 to your computer and use it in GitHub Desktop.
Some text sorting
library(stringr)
grav_roys <- read.csv("G:/New Folder (18)/rays_haug.csv", encoding = "UTF-8", sep = ",")
str(grav_roys)
myvars <- c("OBJECTID", "objid", "informasjo")
newdata <- grav_roys[myvars]
subset <- head(grav_roys)
subset
str(subset)
if (grepl("\\d", "nteionrasop") == TRUE) {
print("True")
} else {
print("Not True")
}
1:nrow(subset)
# Høyde
for (row in 1:nrow(newdata)) {
orgText <- newdata$informasjo[row]
newdata$hoyde[row] <- hoydeFunc(orgText)
}
# Mål
for (row in 1:nrow(newdata)) {
orgText <- newdata$informasjo[row]
newdata$maal[row] <- maalFunc(orgText)
}
# Diameter
for (row in 1:nrow(newdata)) {
orgText <- newdata$informasjo[row]
newdata$diameter[row] <- diamFunc(orgText)
}
# Type
for (row in 1:nrow(newdata)) {
orgText <- newdata$informasjo[row]
newdata$type[row] <- typeFunc(orgText)
}
hoydeFunc <- function(orgText) {
pat <- '(?i)(^.*høyde.*?|h. *?)(\\d+(?:[\\.\\,]\\d{1,2})?).*'
#hoyde <- gsub(pat, '\\2', orgText)
#hoyde <- str_match(orgText, pat)[,3]
#hoyde <- sapply(tmp, "[", c(3))
#hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", hoyde)))
tmp <- gsub("[\r\n]", " ", orgText) # Remove \n new lines
hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", str_match(tmp, pat)[,3])))
if (grepl('\\d', hoyde) == TRUE) {
return(hoyde)
} else if (is.na(hoyde) == TRUE) {
return(NA)
} else {
return(NA)
}
}
maalFunc <- function(orgText) {
pat <- '(?i)^.*mål.*?(\\d+(?:[\\.\\,\\xx]\\d{1,2})?).*'
maal <- gsub(pat, '\\1', orgText)
if (orgText == maal) {
return(NA)
} else if (grepl('\\d', maal) == TRUE) {
return(maal)
} else {
return(NA)
}
}
diamFunc <- function(orgText) {
pat <- '(?i)^.*diam.*?(\\d+(?:[\\.\\,\\xx]\\d{1,2})?).*'
diameter <- gsub(pat, '\\1', orgText)
if (orgText == diameter) {
return(NA)
} else if (grepl('\\d', diameter) == TRUE) {
return(diameter)
} else {
return(NA)
}
}
(?:s|rund.*)
typeFunc <- function(orgText) {
pat <- '(?i)\\b(rundhaug|rundrøys|langhaug|rund haug)\\b'
type <- str_extract(orgText, pat)
if (is.na(type) == TRUE) {
return(NA)
} else if (grepl('\\d', type) == FALSE) {
return(type)
} else {
return(NA)
}
}
str(newdata)
newdata$hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata$hoyde)))
newdata$diameter <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata$diameter)))
#format(newdata$hoyde, decimal.mark = '.')
write.csv(newdata, file = 'G:/kulturminner/New Folder (18)/sorted.csv')
newdata2 <- newdata[ which(newdata$hoyde > 0.2), ]
newdata2$diameter <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata2$diameter)))
write.csv(newdata2, file = 'G:/kulturminner/New Folder (18)/sorted2.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment