public
Created

  • Download Gist
gistfile1.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
#encoding: utf-8
require 'mechanize'
 
class MechanizeEncodingHook
def call(params)
return if params[:response].nil? || params[:response_body].nil?
 
response = params[:response]
content_type = response['Content-Type']
 
internal_encoding = (Encoding::default_internal || "utf-8").to_s.downcase
 
charset = 'windows-1251'
return if content_type.nil? ||
(charset = content_type[/charset=(?<charset>.*)/, "charset"]).nil?
 
content_type = content_type.sub(/charset=.*/,
"charset=#{internal_encoding}")
response['Content-Type'] = content_type
 
response_body = params[:response_body].
force_encoding(charset).
encode(internal_encoding)
response_body[/#{charset}/] = internal_encoding
params[:response_body] = response_body
end
end
 
KINOPOISK_SEARCH_URL = "http://kinopoisk.ru/index.php?kp_query="
query = 'терминатор'
 
agent = Mechanize.new
agent.post_connect_hooks << MechanizeEncodingHook.new
agent.get "#{KINOPOISK_SEARCH_URL}#{query.encode("windows-1251")}"
 
agent.page.search("td.news[width]").map do |section|
puts section.at(".all").content
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.