Skip to content

Instantly share code, notes, and snippets.

@wush978
Created December 3, 2012 07:32
Show Gist options
  • Save wush978/4193405 to your computer and use it in GitHub Desktop.
Save wush978/4193405 to your computer and use it in GitHub Desktop.
remove unicode escape
source("hex2str.R")
remove_unicode_escape <- function(src) {
start.set <- gregexpr("\\\\u[0-9a-fA-F]{4,4}", src, perl=FALSE, useBytes=TRUE)[[1]]
sub.list <- list()
for(start in start.set[length(start.set):1]) {
if (!is.null(sub.list[[substring(src, start, start+5)]])) next
pattern <- substring(src, first=start, last=start + 5)
hex <- substring(pattern, 3, 6)
result <- iconv(hex2str(hex), "UTF16BE", "UTF8")
cat(paste(hex, "-->", result, "\n"))
if (is.na(result)) stop(result)
sub.list[[substring(src, start, start+5)]] <- result
}
for(i in 1:length(sub.list)) {
src <- sub(pattern=names(sub.list)[i], replacement=sub.list[[i]], x=src, fixed=TRUE, useBytes=FALSE)
}
return(src)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment