Created
December 3, 2019 20:45
-
-
Save OterLabb/8e3a41d4ac28a27f5113af32233d39c5 to your computer and use it in GitHub Desktop.
Some text sorting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(stringr) | |
grav_roys <- read.csv("G:/New Folder (18)/rays_haug.csv", encoding = "UTF-8", sep = ",") | |
str(grav_roys) | |
myvars <- c("OBJECTID", "objid", "informasjo") | |
newdata <- grav_roys[myvars] | |
subset <- head(grav_roys) | |
subset | |
str(subset) | |
if (grepl("\\d", "nteionrasop") == TRUE) { | |
print("True") | |
} else { | |
print("Not True") | |
} | |
1:nrow(subset) | |
# Høyde | |
for (row in 1:nrow(newdata)) { | |
orgText <- newdata$informasjo[row] | |
newdata$hoyde[row] <- hoydeFunc(orgText) | |
} | |
# Mål | |
for (row in 1:nrow(newdata)) { | |
orgText <- newdata$informasjo[row] | |
newdata$maal[row] <- maalFunc(orgText) | |
} | |
# Diameter | |
for (row in 1:nrow(newdata)) { | |
orgText <- newdata$informasjo[row] | |
newdata$diameter[row] <- diamFunc(orgText) | |
} | |
# Type | |
for (row in 1:nrow(newdata)) { | |
orgText <- newdata$informasjo[row] | |
newdata$type[row] <- typeFunc(orgText) | |
} | |
hoydeFunc <- function(orgText) { | |
pat <- '(?i)(^.*høyde.*?|h. *?)(\\d+(?:[\\.\\,]\\d{1,2})?).*' | |
#hoyde <- gsub(pat, '\\2', orgText) | |
#hoyde <- str_match(orgText, pat)[,3] | |
#hoyde <- sapply(tmp, "[", c(3)) | |
#hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", hoyde))) | |
tmp <- gsub("[\r\n]", " ", orgText) # Remove \n new lines | |
hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", str_match(tmp, pat)[,3]))) | |
if (grepl('\\d', hoyde) == TRUE) { | |
return(hoyde) | |
} else if (is.na(hoyde) == TRUE) { | |
return(NA) | |
} else { | |
return(NA) | |
} | |
} | |
maalFunc <- function(orgText) { | |
pat <- '(?i)^.*mål.*?(\\d+(?:[\\.\\,\\xx]\\d{1,2})?).*' | |
maal <- gsub(pat, '\\1', orgText) | |
if (orgText == maal) { | |
return(NA) | |
} else if (grepl('\\d', maal) == TRUE) { | |
return(maal) | |
} else { | |
return(NA) | |
} | |
} | |
diamFunc <- function(orgText) { | |
pat <- '(?i)^.*diam.*?(\\d+(?:[\\.\\,\\xx]\\d{1,2})?).*' | |
diameter <- gsub(pat, '\\1', orgText) | |
if (orgText == diameter) { | |
return(NA) | |
} else if (grepl('\\d', diameter) == TRUE) { | |
return(diameter) | |
} else { | |
return(NA) | |
} | |
} | |
(?:s|rund.*) | |
typeFunc <- function(orgText) { | |
pat <- '(?i)\\b(rundhaug|rundrøys|langhaug|rund haug)\\b' | |
type <- str_extract(orgText, pat) | |
if (is.na(type) == TRUE) { | |
return(NA) | |
} else if (grepl('\\d', type) == FALSE) { | |
return(type) | |
} else { | |
return(NA) | |
} | |
} | |
str(newdata) | |
newdata$hoyde <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata$hoyde))) | |
newdata$diameter <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata$diameter))) | |
#format(newdata$hoyde, decimal.mark = '.') | |
write.csv(newdata, file = 'G:/kulturminner/New Folder (18)/sorted.csv') | |
newdata2 <- newdata[ which(newdata$hoyde > 0.2), ] | |
newdata2$diameter <- as.numeric(gsub(",", ".", gsub("\\.", "", newdata2$diameter))) | |
write.csv(newdata2, file = 'G:/kulturminner/New Folder (18)/sorted2.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment