azza-bazoo/profile-with-regmatches.md

## profile-with-regmatches.md

      
    Raw
  

              profile-with-regmatches.md
            
          
    > summaryRprof("Rprof.out")
$by.self
            self.time self.pct total.time total.pct
"substring"    232.76    99.87     232.76     99.87
"<GC>"           0.08     0.03       0.08      0.03
"gregexpr"       0.06     0.03       0.06      0.03
"sort.list"      0.06     0.03       0.06      0.03
"readLines"      0.04     0.02       0.04      0.02
"table"          0.02     0.01       0.08      0.03
"cat"            0.02     0.01       0.04      0.02
"paste"          0.02     0.01       0.02      0.01

$by.total
                      total.time total.pct self.time self.pct
"<Anonymous>"             232.80     99.89      0.00     0.00
"mapply"                  232.80     99.89      0.00     0.00
"regmatches"              232.78     99.88      0.00     0.00
"substring"               232.76     99.87    232.76    99.87
"Map"                     232.76     99.87      0.00     0.00
"as.data.frame"             0.10      0.04      0.00     0.00
"<GC>"                      0.08      0.03      0.08     0.03
"table"                     0.08      0.03      0.02     0.01
"factor"                    0.08      0.03      0.00     0.00
"gregexpr"                  0.06      0.03      0.06     0.03
"sort.list"                 0.06      0.03      0.06     0.03
"readLines"                 0.04      0.02      0.04     0.02
"cat"                       0.04      0.02      0.02     0.01
"paste"                     0.02      0.01      0.02     0.01
"[[.factor"                 0.02      0.01      0.00     0.00
"[["                        0.02      0.01      0.00     0.00
"as.data.frame.table"       0.02      0.01      0.00     0.00
"data.frame"                0.02      0.01      0.00     0.00
"do.call"                   0.02      0.01      0.00     0.00
"eval"                      0.02      0.01      0.00     0.00
"expand.grid"               0.02      0.01      0.00     0.00
"sprintf"                   0.02      0.01      0.00     0.00
"tolower"                   0.02      0.01      0.00     0.00

$sample.interval
[1] 0.02

$sampling.time
[1] 233.06


## profile-with-strsplit.md

      
    Raw
  

              profile-with-strsplit.md
            
          
    > summaryRprof("Rprof.out")
$by.self
                      self.time self.pct total.time total.pct
"strsplit"                 0.12    40.00       0.12     40.00
"readLines"                0.04    13.33       0.04     13.33
"sort.list"                0.04    13.33       0.04     13.33
"as.character"             0.02     6.67       0.04     13.33
"as.character.factor"      0.02     6.67       0.02      6.67
"expand.grid"              0.02     6.67       0.02      6.67
"tolower"                  0.02     6.67       0.02      6.67
"unique.default"           0.02     6.67       0.02      6.67

$by.total
                      total.time total.pct self.time self.pct
"strsplit"                  0.12     40.00      0.12    40.00
"as.data.frame"             0.08     26.67      0.00     0.00
"factor"                    0.06     20.00      0.00     0.00
"table"                     0.06     20.00      0.00     0.00
"readLines"                 0.04     13.33      0.04    13.33
"sort.list"                 0.04     13.33      0.04    13.33
"as.character"              0.04     13.33      0.02     6.67
"<Anonymous>"               0.04     13.33      0.00     0.00
"cat"                       0.04     13.33      0.00     0.00
"mapply"                    0.04     13.33      0.00     0.00
"sprintf"                   0.04     13.33      0.00     0.00
"as.character.factor"       0.02      6.67      0.02     6.67
"expand.grid"               0.02      6.67      0.02     6.67
"tolower"                   0.02      6.67      0.02     6.67
"unique.default"            0.02      6.67      0.02     6.67
"as.data.frame.table"       0.02      6.67      0.00     0.00
"data.frame"                0.02      6.67      0.00     0.00
"do.call"                   0.02      6.67      0.00     0.00
"eval"                      0.02      6.67      0.00     0.00
"unique"                    0.02      6.67      0.00     0.00

$sample.interval
[1] 0.02

$sampling.time
[1] 0.3


## topwords.R
#!/usr/bin/env Rscript

Rprof(filename = "Rprof.out", append = FALSE, interval = 0.02)

num.words <- as.integer(commandArgs(trailingOnly = TRUE))
f <- file("stdin")
input.lines <- readLines(f)
close(f)
full.text <- tolower(paste(input.lines, collapse = " "))

#splits <- gregexpr("\\w+", full.text)
#words.all <- (regmatches(full.text, splits)[[1]])

# For Dummies (!) says to use strsplit:
# http://www.dummies.com/how-to/content/how-to-split-strings-in-r.html
words.all <- strsplit(full.text, "\\W+")

words.unique <- as.data.frame(table(words.all))
words.sorted <- words.unique[order(-words.unique$Freq),]

dummy <- mapply(function(w, c) {
  cat(sprintf("%8d %s\n", c, w))
}, head(words.sorted$words, num.words), head(words.sorted$Freq, num.words))
	#!/usr/bin/env Rscript

	Rprof(filename = "Rprof.out", append = FALSE, interval = 0.02)

	num.words <- as.integer(commandArgs(trailingOnly = TRUE))
	f <- file("stdin")
	input.lines <- readLines(f)
	close(f)
	full.text <- tolower(paste(input.lines, collapse = " "))

	#splits <- gregexpr("\\w+", full.text)
	#words.all <- (regmatches(full.text, splits)[[1]])

	# For Dummies (!) says to use strsplit:
	# http://www.dummies.com/how-to/content/how-to-split-strings-in-r.html
	words.all <- strsplit(full.text, "\\W+")

	words.unique <- as.data.frame(table(words.all))
	words.sorted <- words.unique[order(-words.unique$Freq),]

	dummy <- mapply(function(w, c) {
	cat(sprintf("%8d %s\n", c, w))
	}, head(words.sorted$words, num.words), head(words.sorted$Freq, num.words))