Skip to content

Instantly share code, notes, and snippets.

@charl
Created February 12, 2013 07:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save charl/4760815 to your computer and use it in GitHub Desktop.
Save charl/4760815 to your computer and use it in GitHub Desktop.
Encoding::UndefinedConversionError: "\xE3" from ASCII-8BIT to UTF-8
$ jruby ./test.rb
SUCCESS
=======================================================================
Native: {"created_at"=>"Mon Feb 11 16:33:45 +0000 2013", "id"=>301006116974915600, "id_str"=>"301006116974915586", "text"=>"やっとここまできた: ひとりでやるRiak Advent Calendar 2012 day14 - Haskell Client - kuenishi's blog - http://t.co/QXFzD0Sn", "source"=>"<a href=\"http://sites.google.com/site/yorufukurou/\" rel=\"nofollow\">YoruFukurou</a>", "truncated"=>false, "in_reply_to_status_id"=>nil, "in_reply_to_status_id_str"=>nil, "in_reply_to_user_id"=>nil, "in_reply_to_user_id_str"=>nil, "in_reply_to_screen_name"=>nil, "user"=>{"id"=>5576192, "id_str"=>"5576192", "name"=>"UENISHI Kota", "screen_name"=>"kuenishi", "location"=>"Tokyo, Japan", "description"=>"Just got senior: these tweets are my own; forever. WishList http://t.co/50iJIDzM", "url"=>"http://kuenishi.github.com/", "entities"=>{"url"=>{"urls"=>[{"url"=>"http://kuenishi.github.com/", "expanded_url"=>nil, "indices"=>[0, 27]}]}, "description"=>{"urls"=>[{"url"=>"http://t.co/50iJIDzM", "expanded_url"=>"http://www.amazon.co.jp/registry/wishlist/1P6IW44XCM1H2", "display_url"=>"amazon.co.jp/registry/wishl…", "indices"=>[61, 81]}]}}, "protected"=>false, "followers_count"=>1432, "friends_count"=>495, "listed_count"=>149, "created_at"=>"Sat Apr 28 04:09:38 +0000 2007", "favourites_count"=>766, "utc_offset"=>32400, "time_zone"=>"Tokyo", "geo_enabled"=>false, "verified"=>false, "statuses_count"=>52154, "lang"=>"en", "contributors_enabled"=>false, "is_translator"=>false, "profile_background_color"=>"022330", "profile_background_image_url"=>"http://a0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg", "profile_background_image_url_https"=>"https://si0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg", "profile_background_tile"=>false, "profile_image_url"=>"http://a0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg", "profile_image_url_https"=>"https://si0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg", "profile_banner_url"=>"https://si0.twimg.com/profile_banners/5576192/1348992450", "profile_link_color"=>"0084B4", "profile_sidebar_border_color"=>"FFFFFF", "profile_sidebar_fill_color"=>"C0DFEC", "profile_text_color"=>"333333", "profile_use_background_image"=>true, "default_profile"=>false, "default_profile_image"=>false, "following"=>true, "follow_request_sent"=>false, "notifications"=>nil}, "geo"=>nil, "coordinates"=>nil, "place"=>nil, "contributors"=>nil, "retweet_count"=>0, "entities"=>{"hashtags"=>[], "urls"=>[{"url"=>"http://t.co/QXFzD0Sn", "expanded_url"=>"http://bit.ly/XCxYrq", "display_url"=>"bit.ly/XCxYrq", "indices"=>[86, 106]}], "user_mentions"=>[]}, "favorited"=>false, "retweeted"=>false, "possibly_sensitive"=>false}
JSON: {"created_at":"Mon Feb 11 16:33:45 +0000 2013","id":301006116974915600,"id_str":"301006116974915586","text":"やっとここまできた: ひとりでやるRiak Advent Calendar 2012 day14 - Haskell Client - kuenishi's blog - http://t.co/QXFzD0Sn","source":"<a href=\"http://sites.google.com/site/yorufukurou/\" rel=\"nofollow\">YoruFukurou</a>","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":5576192,"id_str":"5576192","name":"UENISHI Kota","screen_name":"kuenishi","location":"Tokyo, Japan","description":"Just got senior: these tweets are my own; forever. WishList http://t.co/50iJIDzM","url":"http://kuenishi.github.com/","entities":{"url":{"urls":[{"url":"http://kuenishi.github.com/","expanded_url":null,"indices":[0,27]}]},"description":{"urls":[{"url":"http://t.co/50iJIDzM","expanded_url":"http://www.amazon.co.jp/registry/wishlist/1P6IW44XCM1H2","display_url":"amazon.co.jp/registry/wishl…","indices":[61,81]}]}},"protected":false,"followers_count":1432,"friends_count":495,"listed_count":149,"created_at":"Sat Apr 28 04:09:38 +0000 2007","favourites_count":766,"utc_offset":32400,"time_zone":"Tokyo","geo_enabled":false,"verified":false,"statuses_count":52154,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"022330","profile_background_image_url":"http://a0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg","profile_background_image_url_https":"https://si0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg","profile_background_tile":false,"profile_image_url":"http://a0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg","profile_image_url_https":"https://si0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg","profile_banner_url":"https://si0.twimg.com/profile_banners/5576192/1348992450","profile_link_color":"0084B4","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"C0DFEC","profile_text_color":"333333","profile_use_background_image":true,"default_profile":false,"default_profile_image":false,"following":true,"follow_request_sent":false,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"retweet_count":0,"entities":{"hashtags":[],"urls":[{"url":"http://t.co/QXFzD0Sn","expanded_url":"http://bit.ly/XCxYrq","display_url":"bit.ly/XCxYrq","indices":[86,106]}],"user_mentions":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false}
FAIL!
=======================================================================
Native: [{"id"=>"301006116974915600", "created_at"=>"Mon Feb 11 16:33:45 +0000 2013", "entities_urls_display_url"=>"bit.ly/XCxYrq", "entities_urls_expanded_url"=>"http://bit.ly/XCxYrq", "entities_urls_indices"=>"86 106", "entities_urls_url"=>"http://t.co/QXFzD0Sn", "favorited"=>"false", "id_str"=>"301006116974915586", "possibly_sensitive"=>"false", "retweet_count"=>"0", "retweeted"=>"false", "source"=>"<a href=\"http://sites.google.com/site/yorufukurou/\" rel=\"nofollow\">YoruFukurou</a>", "text"=>"\xE3\x82\x84\xE3\x81\xA3\xE3\x81\xA8\xE3\x81\x93\xE3\x81\x93\xE3\x81\xBE\xE3\x81\xA7\xE3\x81\x8D\xE3\x81\x9F: \xE3\x81\xB2\xE3\x81\xA8\xE3\x82\x8A\xE3\x81\xA7\xE3\x82\x84\xE3\x82\x8BRiak Advent Calendar 2012 day14 - Haskell Client - kuenishi's blog - http://t.co/QXFzD0Sn", "truncated"=>"false", "user_contributors_enabled"=>"false", "user_created_at"=>"Sat Apr 28 04:09:38 +0000 2007", "user_default_profile"=>"false", "user_default_profile_image"=>"false", "user_description"=>"Just got senior: these tweets are my own; forever. WishList http://t.co/50iJIDzM", "user_entities_description_urls_display_url"=>"amazon.co.jp/registry/wishl\xE2\x80\xA6", "user_entities_description_urls_expanded_url"=>"http://www.amazon.co.jp/registry/wishlist/1P6IW44XCM1H2", "user_entities_description_urls_indices"=>"61 81", "user_entities_description_urls_url"=>"http://t.co/50iJIDzM", "user_entities_url_urls_indices"=>"0 27", "user_entities_url_urls_url"=>"http://kuenishi.github.com/", "user_favourites_count"=>"766", "user_follow_request_sent"=>"false", "user_followers_count"=>"1432", "user_following"=>"true", "user_friends_count"=>"495", "user_geo_enabled"=>"false", "user_id"=>"5576192", "user_id_str"=>"5576192", "user_is_translator"=>"false", "user_lang"=>"en", "user_listed_count"=>"149", "user_location"=>"Tokyo, Japan", "user_name"=>"UENISHI Kota", "user_profile_background_color"=>"022330", "user_profile_background_image_url"=>"http://a0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg", "user_profile_background_image_url_https"=>"https://si0.twimg.com/profile_background_images/673049678/e5d613f19df540861b6328670b894e74.jpeg", "user_profile_background_tile"=>"false", "user_profile_banner_url"=>"https://si0.twimg.com/profile_banners/5576192/1348992450", "user_profile_image_url"=>"http://a0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg", "user_profile_image_url_https"=>"https://si0.twimg.com/profile_images/2031012144/549099_3529711883603_1296410328_33489801_1920286049_n_normal.jpeg", "user_profile_link_color"=>"0084B4", "user_profile_sidebar_border_color"=>"FFFFFF", "user_profile_sidebar_fill_color"=>"C0DFEC", "user_profile_text_color"=>"333333", "user_profile_use_background_image"=>"true", "user_protected"=>"false", "user_screen_name"=>"kuenishi", "user_statuses_count"=>"52154", "user_time_zone"=>"Tokyo", "user_url"=>"http://kuenishi.github.com/", "user_utc_offset"=>"32400", "user_verified"=>"false"}]
Encoding::UndefinedConversionError: "\xE3" from ASCII-8BIT to UTF-8
encode at org/jruby/RubyString.java:7563
generate at json/ext/GeneratorState.java:210
generate at /home/charl/.rvm/gems/jruby-1.7.2@riak-encoding/gems/json-1.7.7-java/lib/json/common.rb:223
(root) at ./test.rb:39
require 'bundler/setup'
require "json"
Bundler.require
txt = nil
File.open("raw.txt", "rb:UTF-8") do |f|
txt = f.read.split(/\r?\n\r?\n/)[1]
end
tweet = Riak::JSON.parse(txt)
c = Riak::Client.new(:protocol => 'pbc')
bucket = "tweets"
obj = Riak::RObject.new c.bucket(bucket), tweet["id_str"]
obj.content_type = 'application/json'
obj.data = tweet
obj.store
data = [tweet["id_str"]]
# Your way of grabbing the doc directly via the client and bucket works as the
# text is presented back as UTF-8.
puts "SUCCESS"
puts "======================================================================="
reply = c[bucket][tweet["id_str"]].data
puts "Native: #{reply.inspect}"
#puts "Encoding: #{c[bucket][tweet["id_str"]].raw_data.encoding}"
puts "JSON: #{JSON.generate reply}"
print "\n\n"
# Grabbing the doc my way via search casues the text to be presented as
# ASCII-8BIT instead of UTF-8 causing the JSON lib to barf when trying to
# convert the native hash to a JSON string.
puts "FAIL!"
puts "======================================================================="
reply = c.search(bucket, (["id_str"] * data.length).zip(data).map {|t| t.join ":"}.join(" OR "), {:rows => 15000})["docs"]
puts "Native: #{reply.inspect}"
puts "JSON: #{JSON.generate reply.first}"
obj.delete
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment