djo/problem1.rb

## problem1.rb
# Problem 1 of 3: Math
# ====================
#
# n! means n * (n - 1) * ... * 3 * 2 * 1
# For example, 10! = 10 * 9 * ... * 3 * 2 * 1 = 3628800
# Let R(n) equal the sum of the digits in the number n!
# For example, R(10) is 3 + 6 + 2 + 8 + 8 + 0 + 0 = 27.
# Find the lowest value for n where R(n) is 8001.
#
# Result: 787.

def fact(num, acc = 1)
  return acc if num == 1
  fact(num - 1, acc * num)
end

def sum(num)
  res = 0
  num.to_s.each_char { |ch| res = res + ch.to_i }
  res
end

i = 10

while(true)
  if(sum(fact(i)) == 8001)
    p i
    break
  end

  i = i + 1
end

## problem2.rb
# Problem 2 of 3: HTML
# ====================
#
# One way to exclude miscellaneous text from an article is to find the standard deviation
# of the depth of the <p> tags for the <article>. For http://apply.embed.ly/static/data/2.html
# find the standard deviation of all the <p> tags within the <article> tag. Round to the nearest tenth: X.X.
#
# Result: 1.4.

require 'nokogiri'
require 'open-uri'

def depth(element, num = 1)
  return num if element.parent.name == 'article'
  depth(element.parent, num + 1)
end

s = 'http://apply.embed.ly/static/data/2.html'
doc = Nokogiri::HTML open(s)

res = doc.xpath('//p').map { |el| depth(el) }
#=> [1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 3, 1]

avg = res.inject(0){ |n, memo| memo + n }.to_f / res.size
diffs = res.map { |n| (n - avg)*(n - avg) }

p Math.sqrt(diffs.inject(0){ |n, memo| memo + n }.to_f / res.size)

## problem3.rb
# Problem 3 of 3: Zipf's law
# ==========================
#
# A simplified version of Zipf's law:
#  "For a given body of text, the most frequent word will occur approximately twice as often
#   as the second most frequent word, three times as often as the third most frequent word, etc.
#   [x, x/2, x/3, x/4, x/5, ...]"
# The following is a frequency set of words in a body of text that follows Zipf's law:
#  [
#    ('the', 2520),
#    ('of', 1260),
#    ('and', 840),
#    ('a', 630),
#    ('to', 504)
#    ...
#  ]
# Given that the text has 900 unique words, how many unique words, starting with the most frequently used word, make up half the text?
#
# Result: 22.

count = 0
num = 2520

900.times { |n| count = count + (num.to_f / (n+1)).round }

half = (count.to_f / 2).round
sum = 0
res = 0

900.times do |n|
  break if sum >= half

  sum = sum + (num.to_f / (n+1)).round
  res = res + 1
end

p res
	# Problem 1 of 3: Math
	# ====================
	#
	# n! means n * (n - 1) * ... * 3 * 2 * 1
	# For example, 10! = 10 * 9 * ... * 3 * 2 * 1 = 3628800
	# Let R(n) equal the sum of the digits in the number n!
	# For example, R(10) is 3 + 6 + 2 + 8 + 8 + 0 + 0 = 27.
	# Find the lowest value for n where R(n) is 8001.
	#
	# Result: 787.

	def fact(num, acc = 1)
	return acc if num == 1
	fact(num - 1, acc * num)
	end

	def sum(num)
	res = 0
	num.to_s.each_char { \|ch\| res = res + ch.to_i }
	res
	end

	i = 10

	while(true)
	if(sum(fact(i)) == 8001)
	p i
	break
	end

	i = i + 1
	end
	# Problem 2 of 3: HTML
	# ====================
	#
	# One way to exclude miscellaneous text from an article is to find the standard deviation
	# of the depth of the <p> tags for the <article>. For http://apply.embed.ly/static/data/2.html
	# find the standard deviation of all the <p> tags within the <article> tag. Round to the nearest tenth: X.X.
	#
	# Result: 1.4.

	require 'nokogiri'
	require 'open-uri'

	def depth(element, num = 1)
	return num if element.parent.name == 'article'
	depth(element.parent, num + 1)
	end

	s = 'http://apply.embed.ly/static/data/2.html'
	doc = Nokogiri::HTML open(s)

	res = doc.xpath('//p').map { \|el\| depth(el) }
	#=> [1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 2, 1, 3, 1]

	avg = res.inject(0){ \|n, memo\| memo + n }.to_f / res.size
	diffs = res.map { \|n\| (n - avg)*(n - avg) }

	p Math.sqrt(diffs.inject(0){ \|n, memo\| memo + n }.to_f / res.size)
	# Problem 3 of 3: Zipf's law
	# ==========================
	#
	# A simplified version of Zipf's law:
	# "For a given body of text, the most frequent word will occur approximately twice as often
	# as the second most frequent word, three times as often as the third most frequent word, etc.
	# [x, x/2, x/3, x/4, x/5, ...]"
	# The following is a frequency set of words in a body of text that follows Zipf's law:
	# [
	# ('the', 2520),
	# ('of', 1260),
	# ('and', 840),
	# ('a', 630),
	# ('to', 504)
	# ...
	# ]
	# Given that the text has 900 unique words, how many unique words, starting with the most frequently used word, make up half the text?
	#
	# Result: 22.

	count = 0
	num = 2520

	900.times { \|n\| count = count + (num.to_f / (n+1)).round }

	half = (count.to_f / 2).round
	sum = 0
	res = 0

	900.times do \|n\|
	break if sum >= half

	sum = sum + (num.to_f / (n+1)).round
	res = res + 1
	end

	p res