stevenworthington/caliper_text.R

## caliper_text.R
# set working directory
dir_path <-  "path_to_text_files"
setwd(dir_path)

# create vector of filenames
filenames <- list.files(dir_path)

# read in files to a list
docList <- lapply(filenames, scan, what = "character", sep = "\n")

# function to extract a text block by pattern matching,
# plus/minus some caliper (measured in units of lines)
caliper.text <- function(text, pattern, caliper) {
    idx <- regexpr(pattern = pattern, text = text)
    if (any(idx > 0)) {
        idx_pos <- which(idx > 0)
        extract <- text[min(idx_pos - caliper) : max(idx_pos + caliper)]
        return(extract)
    } else {
        message("Pattern is matched zero times")
    }
}

# example patterns, separated by pipes "|"
match_text1 <- "president|public"
match_text2 <- "president|Congressman"
match_text3 <- "sequestration|technological"
match_text4 <- "military actions|troops|surge"

# example function calls
test1 <- lapply(docList, caliper.text, pattern = match_text1, caliper = 2)
test2 <- lapply(docList, caliper.text, pattern = match_text2, caliper = 2)
test3 <- lapply(docList, caliper.text, pattern = match_text3, caliper = 2)
test4 <- lapply(docList, caliper.text, pattern = match_text4, caliper = 2)
	# set working directory
	dir_path <- "path_to_text_files"
	setwd(dir_path)

	# create vector of filenames
	filenames <- list.files(dir_path)

	# read in files to a list
	docList <- lapply(filenames, scan, what = "character", sep = "\n")

	# function to extract a text block by pattern matching,
	# plus/minus some caliper (measured in units of lines)
	caliper.text <- function(text, pattern, caliper) {
	idx <- regexpr(pattern = pattern, text = text)
	if (any(idx > 0)) {
	idx_pos <- which(idx > 0)
	extract <- text[min(idx_pos - caliper) : max(idx_pos + caliper)]
	return(extract)
	} else {
	message("Pattern is matched zero times")
	}
	}

	# example patterns, separated by pipes "\|"
	match_text1 <- "president\|public"
	match_text2 <- "president\|Congressman"
	match_text3 <- "sequestration\|technological"
	match_text4 <- "military actions\|troops\|surge"

	# example function calls
	test1 <- lapply(docList, caliper.text, pattern = match_text1, caliper = 2)
	test2 <- lapply(docList, caliper.text, pattern = match_text2, caliper = 2)
	test3 <- lapply(docList, caliper.text, pattern = match_text3, caliper = 2)
	test4 <- lapply(docList, caliper.text, pattern = match_text4, caliper = 2)