kzfm/ML130727

## ML130727
library(ggplot2)
library(gridExtra)

###

set.seed(1)
x <-seq(-10, 10, by=0.01)
y <- 1 - x ^ 2 + rnorm(length(x), 0, 5)
g1 <- ggplot(data.frame(X=x,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(se = FALSE)
g2 <- ggplot(data.frame(X=x,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(method='lm', se=FALSE)
x.squared <- x ^ 2
g3 <- ggplot(data.frame(X=x.squared,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(method='lm', se = FALSE)

summary(lm(y ~ x))$r.squared
summary(lm(y ~ x.squared))$r.squared

grid.arrange(g1, g2, g3, ncol=2, nrow=2)

###

set.seed(1)
x <- seq(0, 1, by=0.01)
y <- sin(2 * pi * x) + rnorm(length(x), 0, 0.1)
df <- data.frame(X=x, Y=y)

ggplot(df, aes(x=X, y=Y)) + geom_point()
ggplot(df, aes(x=X, y=Y)) + geom_point() + geom_smooth(method='lm', se=FALSE)
ggplot(df, aes(x=X, y=Y)) + geom_point() + geom_smooth(se=FALSE)

poly.fit1 <- lm(Y ~ poly(X, degree = 1), data=df)
df1 <- transform(df, PredictedY = predict(poly.fit1))
gg1 <- ggplot(df1, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

poly.fit3 <- lm(Y ~ poly(X, degree = 3), data=df)
df3 <- transform(df, PredictedY = predict(poly.fit3))
gg3 <- ggplot(df3, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

poly.fit5 <- lm(Y ~ poly(X, degree = 5), data=df)
df5 <- transform(df, PredictedY = predict(poly.fit5))
gg5 <- ggplot(df5, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

poly.fit25 <- lm(Y ~ poly(X, degree = 25), data=df)
df25 <- transform(df, PredictedY = predict(poly.fit25))
gg25 <- ggplot(df25, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

grid.arrange(gg1, gg3, gg5, gg25, ncol=2, nrow=2)

###

set.seed(1)
x <- seq(0, 1, by=0.01)
y <- sin(2 * pi * x) + rnorm(length(x), 0 , 0.1)

n <- length(x)

indices <- sort(sample(1:n, round(0.5 * n)))

training.x <- x[indices]
training.y <- y[indices]

test.x <- x[-indices]
test.y <- y[-indices]

training.df <- data.frame(X=training.x, Y=training.y)
test.df <- data.frame(X=test.x, Y=test.y)

rmse <- function(y, h) { return(sqrt(mean((y-h)^2)))}

performance <- data.frame()

for (d in 1:12) {
  poly.fit <- lm(Y ~ poly(X, degree=d), data=training.df)
  performance <- rbind(performance,
                       data.frame(Degree=d,
                                  Data='Training',
                                  RMSE=rmse(training.y, predict(poly.fit))))

  performance <- rbind(performance,
                       data.frame(Degree=d,
                                  Data='Test',
                                  RMSE=rmse(test.y, predict(poly.fit, newdata=test.df))))
}

ggplot(performance, aes(x=Degree, y=RMSE, linetype=Data)) + geom_point() + geom_line()

### 6.2.1

set.seed(1)
x <- seq(0, 1, by=0.01)
y <- sin(2 * pi * x) + rnorm(length(x), 0 , 0.1)

x <- matrix(x)
library(glmnet)
glmnet(x, y)

n <- length(x)

indices <- sort(sample(1:n, round(0.5 * n)))

training.x <- x[indices]
training.y <- y[indices]

test.x <- x[-indices]
test.y <- y[-indices]

training.df <- data.frame(X=training.x, Y=training.y)
test.df <- data.frame(X=test.x, Y=test.y)

glmnet.fit <- with(training.df, glmnet(poly(X, degree=10), Y))
lambdas <- glmnet.fit$lambda
performance <- data.frame()

for (lambda in lambdas) {
  performance <- rbind(performance,
                       data.frame(Lambda=lambda,
                                  RMSE=rmse(test.y, with(test.df, predict(glmnet.fit,
                                                                          poly(X,
                                                                              degree=10),s=lambda)))))
}

ggplot(performance, aes(x=Lambda, y=RMSE)) + geom_point() + geom_line()

best.lambda <- with(performance, Lambda[which(RMSE == min(RMSE))])
glmnet.fit <- with(df, glmnet(poly(X, degree=10), Y))
coef(glmnet.fit, s=best.lambda)

###
# setwd("/Users//kzfm/lang/rcode/ML_for_Hackers//06-Regularization")

ranks <- read.csv("data/oreilly.csv", stringsAsFactors = FALSE)
library(tm)
documents <- data.frame(Text = ranks$Long.Desc.)
row.names(documents) <- 1:nrow(documents)

corpus <- Corpus(DataframeSource(documents))
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeWords, stopwords('english'))

dtm <- DocumentTermMatrix(corpus)
x <- as.matrix(dtm)
y <- rev(1:100)

set.seed(1)
library(glmnet)

performance <- data.frame()

for (lambda in c(0.1, 0.25, 0.5, 1, 2, 5))
{
  for (i in 1:50)
  {
    indices <- sample(1:100, 80)

    training.x <- x[indices, ]
    training.y <- y[indices]

    test.x <- x[-indices, ]
    test.y <- y[-indices]

    glm.fit <- glmnet(training.x, training.y)

    predicted.y <- predict(glm.fit, test.x, s = lambda)

    rmse <- sqrt(mean((predicted.y - test.y) ^ 2))

    performance <- rbind(performance,
                         data.frame(Lambda = lambda,
                                    Iteration = i,
                                    RMSE = rmse))
  }
}

ggplot(performance, aes(x = Lambda, y = RMSE)) +
  stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar') +
  stat_summary(fun.data = 'mean_cl_boot', geom = 'point')

###

y <- rep(c(1, 0), each = 50)
regularized.fit <- glmnet(x, y, family = 'binomial')
set.seed(1)
performance <- data.frame()

for (i in 1:250)
{
  indices <- sample(1:100, 80)

  training.x <- x[indices, ]
  training.y <- y[indices]

  test.x <- x[-indices, ]
  test.y <- y[-indices]

  for (lambda in c(0.0001, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.5, 0.1))
  {
    glm.fit <- glmnet(training.x, training.y, family = 'binomial')

    predicted.y <- ifelse(predict(glm.fit, test.x, s = lambda) > 0, 1, 0)

    error.rate <- mean(predicted.y != test.y)

    performance <- rbind(performance,
                         data.frame(Lambda = lambda,
                                    Iteration = i,
                                    ErrorRate = error.rate))
  }
}

ggplot(performance, aes(x = Lambda, y = ErrorRate)) +
  stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar') +
  stat_summary(fun.data = 'mean_cl_boot', geom = 'point') +
  scale_x_log10()

# chapter 7
heights.weights <- read.csv("data/01_heights_weights_genders.csv")

height.to.weight <- function(height, a, b){ return(a + b * height)}

ridge.error <- function(heights.weights, a, b, lambda){
  predictions <- with(heights.weights, height.to.weight(Height, a, b))
  errors <- with(heights.weights, Weight - predictions)
  return(sum(errors ^ 2) + lambda * (a^2+b^2))
}

lambda <- 1
optim(c(0,0),
      function (x) {ridge.error(heights.weights, x[1], x[2], lambda)}
      )
#$par
#[1] -340.434108    7.562524


lm(Weight ~ Height, data=heights.weights)
#Call:
#  lm(formula = Weight ~ Height, data = heights.weights)
#
#Coefficients:
#  (Intercept)       Height
#-350.737        7.717

a.ridge.error <- function(a, lambda) {return(ridge.error(heights.weights, a, 0, lambda))}
curve(sapply(x, function (a) {a.ridge.error(a, lambda)}), from = -1000, to = 1000)

b.ridge.error <- function(b, lambda) {return(ridge.error(heights.weights, 0, b, lambda))}
curve(sapply(x, function (b) {b.ridge.error(b, lambda)}), from = -1000, to = 1000)

### copy & paste :-)

english.letters <- c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
                     'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                     'w', 'x', 'y', 'z')

caesar.cipher <- list()

inverse.caesar.cipher <- list()

for (index in 1:length(english.letters))
{
  caesar.cipher[[english.letters[index]]] <- english.letters[index %% 26 + 1]
  inverse.caesar.cipher[[english.letters[index %% 26 + 1]]] <- english.letters[index]
}

print(caesar.cipher)

# Fifteenth code snippet
apply.cipher.to.string <- function(string, cipher)
{
  output <- ''

  for (i in 1:nchar(string))
  {
    output <- paste(output, cipher[[substr(string, i, i)]], sep = '')
  }

  return(output)
}

apply.cipher.to.text <- function(text, cipher)
{
  output <- c()

  for (string in text)
  {
    output <- c(output, apply.cipher.to.string(string, cipher))
  }

  return(output)
}

apply.cipher.to.text(c('sample', 'text'), caesar.cipher)

# Sixteenth code snippet
generate.random.cipher <- function()
{
  cipher <- list()

  inputs <- english.letters

  outputs <- english.letters[sample(1:length(english.letters), length(english.letters))]

  for (index in 1:length(english.letters))
  {
    cipher[[inputs[index]]] <- outputs[index]
  }

  return(cipher)
}

modify.cipher <- function(cipher, input, output)
{
  new.cipher <- cipher

  new.cipher[[input]] <- output

  old.output <- cipher[[input]]

  collateral.input <- names(which(sapply(names(cipher),
                                         function (key) {cipher[[key]]}) == output))

  new.cipher[[collateral.input]] <- old.output

  return(new.cipher)
}

propose.modified.cipher <- function(cipher)
{
  input <- sample(names(cipher), 1)

  output <- sample(english.letters, 1)

  return(modify.cipher(cipher, input, output))
}

# Seventeenth code snippet
load(file.path('data', 'lexical_database.Rdata'))

# Eighteength code snippet
lexical.database[['a']]
lexical.database[['the']]
lexical.database[['he']]
lexical.database[['she']]
lexical.database[['data']]

# Nineteenth code snippet
one.gram.probability <- function(one.gram, lexical.database = list())
{
  lexical.probability <- lexical.database[[one.gram]]

  if (is.null(lexical.probability) || is.na(lexical.probability))
  {
    return(.Machine$double.eps)
  }
  else
  {
    return(lexical.probability)
  }
}

# Twentieth code snippet
log.probability.of.text <- function(text, cipher, lexical.database = list())
{
  log.probability <- 0.0

  for (string in text)
  {
    decrypted.string <- apply.cipher.to.string(string, cipher)
    log.probability <- log.probability +
      log(one.gram.probability(decrypted.string, lexical.database))
  }

  return(log.probability)
}

# Twenty-first code snippet
metropolis.step <- function(text, cipher, lexical.database = list())
{
  proposed.cipher <- propose.modified.cipher(cipher)

  lp1 <- log.probability.of.text(text, cipher, lexical.database)
  lp2 <- log.probability.of.text(text, proposed.cipher, lexical.database)

  if (lp2 > lp1)
  {
    return(proposed.cipher)
  }
  else
  {
    a <- exp(lp2 - lp1)
    x <- runif(1)

    if (x < a)
    {
      return(proposed.cipher)
    }
    else
    {
      return(cipher)
    }
  }
}

# Twenty-second code snippet
decrypted.text <- c('here', 'is', 'some', 'sample', 'text')

# Twenty-third code snippet
encrypted.text <- apply.cipher.to.text(decrypted.text, caesar.cipher)

# Twenty-fourth code snippet
set.seed(1)

cipher <- generate.random.cipher()

results <- data.frame()

number.of.iterations <- 50000

for (iteration in 1:number.of.iterations)
{
  log.probability <- log.probability.of.text(encrypted.text,
                                             cipher,
                                             lexical.database)

  current.decrypted.text <- paste(apply.cipher.to.text(encrypted.text,
                                                       cipher),
                                  collapse = ' ')

  correct.text <- as.numeric(current.decrypted.text == paste(decrypted.text,
                                                             collapse = ' '))

  results <- rbind(results,
                   data.frame(Iteration = iteration,
                              LogProbability = log.probability,
                              CurrentDecryptedText = current.decrypted.text,
                              CorrectText = correct.text))

  cipher <- metropolis.step(encrypted.text, cipher, lexical.database)
}

write.table(results,
            file = file.path('data/results.tsv'),
            row.names = FALSE,
            sep = '\t')
	library(ggplot2)
	library(gridExtra)

	###

	set.seed(1)
	x <-seq(-10, 10, by=0.01)
	y <- 1 - x ^ 2 + rnorm(length(x), 0, 5)
	g1 <- ggplot(data.frame(X=x,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(se = FALSE)
	g2 <- ggplot(data.frame(X=x,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(method='lm', se=FALSE)
	x.squared <- x ^ 2
	g3 <- ggplot(data.frame(X=x.squared,Y=y), aes(x=X, y=Y))+ geom_point() + geom_smooth(method='lm', se = FALSE)

	summary(lm(y ~ x))$r.squared
	summary(lm(y ~ x.squared))$r.squared

	grid.arrange(g1, g2, g3, ncol=2, nrow=2)

	###

	set.seed(1)
	x <- seq(0, 1, by=0.01)
	y <- sin(2 * pi * x) + rnorm(length(x), 0, 0.1)
	df <- data.frame(X=x, Y=y)

	ggplot(df, aes(x=X, y=Y)) + geom_point()
	ggplot(df, aes(x=X, y=Y)) + geom_point() + geom_smooth(method='lm', se=FALSE)
	ggplot(df, aes(x=X, y=Y)) + geom_point() + geom_smooth(se=FALSE)

	poly.fit1 <- lm(Y ~ poly(X, degree = 1), data=df)
	df1 <- transform(df, PredictedY = predict(poly.fit1))
	gg1 <- ggplot(df1, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

	poly.fit3 <- lm(Y ~ poly(X, degree = 3), data=df)
	df3 <- transform(df, PredictedY = predict(poly.fit3))
	gg3 <- ggplot(df3, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

	poly.fit5 <- lm(Y ~ poly(X, degree = 5), data=df)
	df5 <- transform(df, PredictedY = predict(poly.fit5))
	gg5 <- ggplot(df5, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

	poly.fit25 <- lm(Y ~ poly(X, degree = 25), data=df)
	df25 <- transform(df, PredictedY = predict(poly.fit25))
	gg25 <- ggplot(df25, aes(x=X, y=PredictedY)) + geom_point() + geom_line()

	grid.arrange(gg1, gg3, gg5, gg25, ncol=2, nrow=2)

	###

	set.seed(1)
	x <- seq(0, 1, by=0.01)
	y <- sin(2 * pi * x) + rnorm(length(x), 0 , 0.1)

	n <- length(x)

	indices <- sort(sample(1:n, round(0.5 * n)))

	training.x <- x[indices]
	training.y <- y[indices]

	test.x <- x[-indices]
	test.y <- y[-indices]

	training.df <- data.frame(X=training.x, Y=training.y)
	test.df <- data.frame(X=test.x, Y=test.y)

	rmse <- function(y, h) { return(sqrt(mean((y-h)^2)))}

	performance <- data.frame()

	for (d in 1:12) {
	poly.fit <- lm(Y ~ poly(X, degree=d), data=training.df)
	performance <- rbind(performance,
	data.frame(Degree=d,
	Data='Training',
	RMSE=rmse(training.y, predict(poly.fit))))

	performance <- rbind(performance,
	data.frame(Degree=d,
	Data='Test',
	RMSE=rmse(test.y, predict(poly.fit, newdata=test.df))))
	}

	ggplot(performance, aes(x=Degree, y=RMSE, linetype=Data)) + geom_point() + geom_line()

	### 6.2.1

	set.seed(1)
	x <- seq(0, 1, by=0.01)
	y <- sin(2 * pi * x) + rnorm(length(x), 0 , 0.1)

	x <- matrix(x)
	library(glmnet)
	glmnet(x, y)

	n <- length(x)

	indices <- sort(sample(1:n, round(0.5 * n)))

	training.x <- x[indices]
	training.y <- y[indices]

	test.x <- x[-indices]
	test.y <- y[-indices]

	training.df <- data.frame(X=training.x, Y=training.y)
	test.df <- data.frame(X=test.x, Y=test.y)

	glmnet.fit <- with(training.df, glmnet(poly(X, degree=10), Y))
	lambdas <- glmnet.fit$lambda
	performance <- data.frame()

	for (lambda in lambdas) {
	performance <- rbind(performance,
	data.frame(Lambda=lambda,
	RMSE=rmse(test.y, with(test.df, predict(glmnet.fit,
	poly(X,
	degree=10),s=lambda)))))
	}

	ggplot(performance, aes(x=Lambda, y=RMSE)) + geom_point() + geom_line()

	best.lambda <- with(performance, Lambda[which(RMSE == min(RMSE))])
	glmnet.fit <- with(df, glmnet(poly(X, degree=10), Y))
	coef(glmnet.fit, s=best.lambda)

	###
	# setwd("/Users//kzfm/lang/rcode/ML_for_Hackers//06-Regularization")

	ranks <- read.csv("data/oreilly.csv", stringsAsFactors = FALSE)
	library(tm)
	documents <- data.frame(Text = ranks$Long.Desc.)
	row.names(documents) <- 1:nrow(documents)

	corpus <- Corpus(DataframeSource(documents))
	corpus <- tm_map(corpus, tolower)
	corpus <- tm_map(corpus, stripWhitespace)
	corpus <- tm_map(corpus, removeWords, stopwords('english'))

	dtm <- DocumentTermMatrix(corpus)
	x <- as.matrix(dtm)
	y <- rev(1:100)

	set.seed(1)
	library(glmnet)

	performance <- data.frame()

	for (lambda in c(0.1, 0.25, 0.5, 1, 2, 5))
	{
	for (i in 1:50)
	{
	indices <- sample(1:100, 80)

	training.x <- x[indices, ]
	training.y <- y[indices]

	test.x <- x[-indices, ]
	test.y <- y[-indices]

	glm.fit <- glmnet(training.x, training.y)

	predicted.y <- predict(glm.fit, test.x, s = lambda)

	rmse <- sqrt(mean((predicted.y - test.y) ^ 2))

	performance <- rbind(performance,
	data.frame(Lambda = lambda,
	Iteration = i,
	RMSE = rmse))
	}
	}

	ggplot(performance, aes(x = Lambda, y = RMSE)) +
	stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar') +
	stat_summary(fun.data = 'mean_cl_boot', geom = 'point')

	###

	y <- rep(c(1, 0), each = 50)
	regularized.fit <- glmnet(x, y, family = 'binomial')
	set.seed(1)
	performance <- data.frame()

	for (i in 1:250)
	{
	indices <- sample(1:100, 80)

	training.x <- x[indices, ]
	training.y <- y[indices]

	test.x <- x[-indices, ]
	test.y <- y[-indices]

	for (lambda in c(0.0001, 0.001, 0.0025, 0.005, 0.01, 0.025, 0.5, 0.1))
	{
	glm.fit <- glmnet(training.x, training.y, family = 'binomial')

	predicted.y <- ifelse(predict(glm.fit, test.x, s = lambda) > 0, 1, 0)

	error.rate <- mean(predicted.y != test.y)

	performance <- rbind(performance,
	data.frame(Lambda = lambda,
	Iteration = i,
	ErrorRate = error.rate))
	}
	}

	ggplot(performance, aes(x = Lambda, y = ErrorRate)) +
	stat_summary(fun.data = 'mean_cl_boot', geom = 'errorbar') +
	stat_summary(fun.data = 'mean_cl_boot', geom = 'point') +
	scale_x_log10()

	# chapter 7
	heights.weights <- read.csv("data/01_heights_weights_genders.csv")

	height.to.weight <- function(height, a, b){ return(a + b * height)}

	ridge.error <- function(heights.weights, a, b, lambda){
	predictions <- with(heights.weights, height.to.weight(Height, a, b))
	errors <- with(heights.weights, Weight - predictions)
	return(sum(errors ^ 2) + lambda * (a^2+b^2))
	}

	lambda <- 1
	optim(c(0,0),
	function (x) {ridge.error(heights.weights, x[1], x[2], lambda)}
	)
	#$par
	#[1] -340.434108 7.562524


	lm(Weight ~ Height, data=heights.weights)
	#Call:
	# lm(formula = Weight ~ Height, data = heights.weights)
	#
	#Coefficients:
	# (Intercept) Height
	#-350.737 7.717

	a.ridge.error <- function(a, lambda) {return(ridge.error(heights.weights, a, 0, lambda))}
	curve(sapply(x, function (a) {a.ridge.error(a, lambda)}), from = -1000, to = 1000)

	b.ridge.error <- function(b, lambda) {return(ridge.error(heights.weights, 0, b, lambda))}
	curve(sapply(x, function (b) {b.ridge.error(b, lambda)}), from = -1000, to = 1000)

	### copy & paste :-)

	english.letters <- c('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
	'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
	'w', 'x', 'y', 'z')

	caesar.cipher <- list()

	inverse.caesar.cipher <- list()

	for (index in 1:length(english.letters))
	{
	caesar.cipher[[english.letters[index]]] <- english.letters[index %% 26 + 1]
	inverse.caesar.cipher[[english.letters[index %% 26 + 1]]] <- english.letters[index]
	}

	print(caesar.cipher)

	# Fifteenth code snippet
	apply.cipher.to.string <- function(string, cipher)
	{
	output <- ''

	for (i in 1:nchar(string))
	{
	output <- paste(output, cipher[[substr(string, i, i)]], sep = '')
	}

	return(output)
	}

	apply.cipher.to.text <- function(text, cipher)
	{
	output <- c()

	for (string in text)
	{
	output <- c(output, apply.cipher.to.string(string, cipher))
	}

	return(output)
	}

	apply.cipher.to.text(c('sample', 'text'), caesar.cipher)

	# Sixteenth code snippet
	generate.random.cipher <- function()
	{
	cipher <- list()

	inputs <- english.letters

	outputs <- english.letters[sample(1:length(english.letters), length(english.letters))]

	for (index in 1:length(english.letters))
	{
	cipher[[inputs[index]]] <- outputs[index]
	}

	return(cipher)
	}

	modify.cipher <- function(cipher, input, output)
	{
	new.cipher <- cipher

	new.cipher[[input]] <- output

	old.output <- cipher[[input]]

	collateral.input <- names(which(sapply(names(cipher),
	function (key) {cipher[[key]]}) == output))

	new.cipher[[collateral.input]] <- old.output

	return(new.cipher)
	}

	propose.modified.cipher <- function(cipher)
	{
	input <- sample(names(cipher), 1)

	output <- sample(english.letters, 1)

	return(modify.cipher(cipher, input, output))
	}

	# Seventeenth code snippet
	load(file.path('data', 'lexical_database.Rdata'))

	# Eighteength code snippet
	lexical.database[['a']]
	lexical.database[['the']]
	lexical.database[['he']]
	lexical.database[['she']]
	lexical.database[['data']]

	# Nineteenth code snippet
	one.gram.probability <- function(one.gram, lexical.database = list())
	{
	lexical.probability <- lexical.database[[one.gram]]

	if (is.null(lexical.probability) \|\| is.na(lexical.probability))
	{
	return(.Machine$double.eps)
	}
	else
	{
	return(lexical.probability)
	}
	}

	# Twentieth code snippet
	log.probability.of.text <- function(text, cipher, lexical.database = list())
	{
	log.probability <- 0.0

	for (string in text)
	{
	decrypted.string <- apply.cipher.to.string(string, cipher)
	log.probability <- log.probability +
	log(one.gram.probability(decrypted.string, lexical.database))
	}

	return(log.probability)
	}

	# Twenty-first code snippet
	metropolis.step <- function(text, cipher, lexical.database = list())
	{
	proposed.cipher <- propose.modified.cipher(cipher)

	lp1 <- log.probability.of.text(text, cipher, lexical.database)
	lp2 <- log.probability.of.text(text, proposed.cipher, lexical.database)

	if (lp2 > lp1)
	{
	return(proposed.cipher)
	}
	else
	{
	a <- exp(lp2 - lp1)
	x <- runif(1)

	if (x < a)
	{
	return(proposed.cipher)
	}
	else
	{
	return(cipher)
	}
	}
	}

	# Twenty-second code snippet
	decrypted.text <- c('here', 'is', 'some', 'sample', 'text')

	# Twenty-third code snippet
	encrypted.text <- apply.cipher.to.text(decrypted.text, caesar.cipher)

	# Twenty-fourth code snippet
	set.seed(1)

	cipher <- generate.random.cipher()

	results <- data.frame()

	number.of.iterations <- 50000

	for (iteration in 1:number.of.iterations)
	{
	log.probability <- log.probability.of.text(encrypted.text,
	cipher,
	lexical.database)

	current.decrypted.text <- paste(apply.cipher.to.text(encrypted.text,
	cipher),
	collapse = ' ')

	correct.text <- as.numeric(current.decrypted.text == paste(decrypted.text,
	collapse = ' '))

	results <- rbind(results,
	data.frame(Iteration = iteration,
	LogProbability = log.probability,
	CurrentDecryptedText = current.decrypted.text,
	CorrectText = correct.text))

	cipher <- metropolis.step(encrypted.text, cipher, lexical.database)
	}

	write.table(results,
	file = file.path('data/results.tsv'),
	row.names = FALSE,
	sep = '\t')