usahg/w3c_validator_with_mechanize_authentication.rb

## w3c_validator_with_mechanize_authentication.rb
#dependencies :
#gem install mechanize
# ^ if this doesnt work, and you are on ubuntu, you'll have to install nokogiri
# gem install __name__ is now by default equivalent of gem install __name__ -y (-y for including all dependencies)
#gem install w3c_validtors


# I had to write this in order to validate pages against html5 compliance for our in house product
# my first repo.


require "rubygems"
require "w3c_validators"
require 'mechanize'
include W3CValidators

def validate(text_frag)

  @validator = MarkupValidator.new

  @validator.set_doctype!(:html5)
  # for example, change :html5 to :html32 and so on to validate against diff versions

  html = text_frag

  results = @validator.validate_text(html)

  #puts "\n\n\n-------here goes validation by using a url-------\n\n\n\n"


  k = [ ]
  if results.errors.length > 0
      results.errors.each do |err|
        puts err.to_s
        k << err.to_s
      end
    else
      puts 'Valid!'
    end

  num = 1

  File.open('errorlog_html'.concat(num.to_s), 'a') do |f|
    num = num +1
    f.puts "generating error log for url "
    f.puts "\nat ".concat(Time.now.to_s), " "
    f.puts k
    f.puts "\n"

  end
  #puts "exiting..."
  #exit()

end


def get_list_of_html_fragments(links)
  #this function collects html source of all the links you give it
  #links must be in ruby array format

  bodies = [ ]
  # ^this list will have html soruces

  agent = Mechanize.new
  page = agent.get 'www.twitter.com'
  form = page.forms.first

  form.userName = ' '
  form.pwd = ' '

  page = agent.submit form
  #puts page.body

  bodies << page.body

  links.each do |link|
    page = agent.get link
    bodies << page.body
  end

  return bodies

end


# you can add your own implementation here, import links from a file etc


links = [ ]

links << 'www.twitter.com/railstroll'

links << 'www.twitter.com/someone_else'

bodies =  get_list_of_html_fragments(links)


bodies.each do |frag|
  puts "validating this frag ", frag
  validate(frag)
  puts "\n\n\n successfully validated.....\n\n\n"
end


puts "done done done"
	#dependencies :
	#gem install mechanize
	# ^ if this doesnt work, and you are on ubuntu, you'll have to install nokogiri
	# gem install __name__ is now by default equivalent of gem install __name__ -y (-y for including all dependencies)
	#gem install w3c_validtors


	# I had to write this in order to validate pages against html5 compliance for our in house product
	# my first repo.


	require "rubygems"
	require "w3c_validators"
	require 'mechanize'
	include W3CValidators

	def validate(text_frag)

	@validator = MarkupValidator.new

	@validator.set_doctype!(:html5)
	# for example, change :html5 to :html32 and so on to validate against diff versions

	html = text_frag

	results = @validator.validate_text(html)

	#puts "\n\n\n-------here goes validation by using a url-------\n\n\n\n"


	k = [ ]
	if results.errors.length > 0
	results.errors.each do \|err\|
	puts err.to_s
	k << err.to_s
	end
	else
	puts 'Valid!'
	end

	num = 1

	File.open('errorlog_html'.concat(num.to_s), 'a') do \|f\|
	num = num +1
	f.puts "generating error log for url "
	f.puts "\nat ".concat(Time.now.to_s), " "
	f.puts k
	f.puts "\n"

	end
	#puts "exiting..."
	#exit()

	end


	def get_list_of_html_fragments(links)
	#this function collects html source of all the links you give it
	#links must be in ruby array format

	bodies = [ ]
	# ^this list will have html soruces

	agent = Mechanize.new
	page = agent.get 'www.twitter.com'
	form = page.forms.first

	form.userName = ' '
	form.pwd = ' '

	page = agent.submit form
	#puts page.body

	bodies << page.body

	links.each do \|link\|
	page = agent.get link
	bodies << page.body
	end

	return bodies

	end



	# you can add your own implementation here, import links from a file etc


	links = [ ]

	links << 'www.twitter.com/railstroll'

	links << 'www.twitter.com/someone_else'

	bodies = get_list_of_html_fragments(links)


	bodies.each do \|frag\|
	puts "validating this frag ", frag
	validate(frag)
	puts "\n\n\n successfully validated.....\n\n\n"
	end


	puts "done done done"