Skip to content

Instantly share code, notes, and snippets.

@ahebrank
Last active August 29, 2015 14:25
Show Gist options
  • Save ahebrank/fc32dfafe0b855584815 to your computer and use it in GitHub Desktop.
Save ahebrank/fc32dfafe0b855584815 to your computer and use it in GitHub Desktop.
# source me and then use:
#
# splitfile("workbook.xlsx", 1)
#
# to extract sheet 1 from workbook.xlsx
# set these if they vary
# subj_col determines the name of the output file
subj_col <- 1
# content_col determines the column with the contents of the file
content_col <- 2
# is there a header row?
has_header <- TRUE
# where to write them out?
output_directory <- "txt"
require('gdata')
splitfile <- function(filename, sheet = 1) {
if (!file.exists(output_directory)) {
dir.create(output_directory)
}
x <- read.xls(filename, sheet, header = has_header, quote = '', method='tab')
if (subj_col > ncol(x) || content_col > ncol(x)) {
error('Either subject col or content col are outside number of columns available')
}
n <- nrow(x)
for (i in 1:n) {
sn <- trim_quotes(x[i, subj_col])
if (nchar(sn)>17) {
# probably a bad conversion from numeric to string
sn <- as.character(as.numeric(sn))
}
outfn <- sprintf('%s/%s.txt', output_directory, sn)
print(outfn)
fout <- file(outfn)
writeLines(as.character(trim_quotes(x[i, content_col])), fout)
close(fout)
}
}
trim_quotes <- function(cell) {
# remove the quotes from the first and last character
x <- gsub('[(^")("$)]', '', cell)
gsub('\\\\', '"', x)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment