Skip to content

Instantly share code, notes, and snippets.

@koshigoe
Last active November 15, 2018 03:40
Show Gist options
  • Save koshigoe/d8aca6d53d6ef96ed8497027f1f8f2b2 to your computer and use it in GitHub Desktop.
Save koshigoe/d8aca6d53d6ef96ed8497027f1f8f2b2 to your computer and use it in GitHub Desktop.
pg gem: invalid byte sequence in UTF-8
require 'pg'
# $ docker run --rm -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres:9.6
conn = PG.connect(host: 'localhost', port: 5432, user: 'postgres', password: 'password', dbname: 'postgres')
conn.exec <<SQL
drop table if exists test_copy_to;
create table test_copy_to (str varchar);
insert into test_copy_to (str) values ('Ä');
SQL
dec = PG::TextDecoder::CopyRow.new
conn.copy_data('COPY (select * from test_copy_to) TO STDOUT', dec) do
while row = conn.get_copy_data
puts ['(1)', row[0].inspect, row[0][0].inspect, row[0].encoding].join("\t")
end
end
dec = PG::TextDecoder::CopyRow.new(type_map: PG::TypeMapByColumn.new([PG::TextDecoder::String.new]))
conn.copy_data('COPY (select * from test_copy_to) TO STDOUT', dec) do
while row = conn.get_copy_data
puts ['(2)', row[0].inspect, row[0][0].inspect, row[0].encoding].join("\t")
end
end
dec = PG::TextDecoder::CopyRow.new(type_map: PG::TypeMapByColumn.new([PG::TextDecoder::Bytea.new]))
conn.copy_data('COPY (select * from test_copy_to) TO STDOUT', dec) do
while row = conn.get_copy_data
puts ['(3)', row[0].inspect, row[0][0].inspect, row[0].encoding].join("\t")
str = row[0].force_encoding('utf-8')
puts ['(4)', str.inspect, str[0].inspect, str.encoding].join("\t")
end
end
conn.copy_data('COPY (select * from test_copy_to) TO STDOUT CSV') do
while row = conn.get_copy_data
row = row.chomp
puts ['(5)', row.inspect, row[0].inspect, row.encoding].join("\t")
str = row.force_encoding('utf-8')
puts ['(6)', str[0].inspect, str.inspect, str.encoding].join("\t")
end
end
str = conn.exec('select * from test_copy_to').getvalue(0, 0)
puts ['(7)', str.inspect, str[0].inspect, str.encoding].join("\t")
__END__
ruby 2.5.3p105 (2018-10-18 revision 65156) [x86_64-darwin18]
(1) "Ä" "\xC3" UTF-8
(2) "Ä" "\xC3" UTF-8
(3) "\xC3\x84" "\xC3" ASCII-8BIT
(4) "Ä" "Ä" UTF-8
(5) "\xC3\x84" "\xC3" ASCII-8BIT
(6) "Ä" "Ä" UTF-8
(7) "Ä" "Ä" UTF-8
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment