briandfoy/perlybot_test

## perlybot_test
#!/Users/brian/bin/perls/perl5.22.0

use utf8;
use Encode qw(encode decode);
use v5.10;
use open qw(:std :utf8);
use Data::Dumper;
use HTTP::Tiny;
use Devel::Peek;

=pod

Remember:

Octets are a representation of data.

A Perl string is an abstract representation where we don't think about storage.

Many people think the verbs go the opposite way they do.

	Encoding goes to octets, which is the physical storage
		encode turns a Perl string into octets

	Decoding goes from the physical storage to character strings
		decode turns octets into a Perl string

Perl mostly handles the translation to and from physical storage,
and many of the modules we use are going to handle the conversion.
The cases where they don't is most likely because they guess at the
encoding and get it wrong:

	The headers lie, because the creator saved the document incorrectly

	The meta headers have the same problem

=cut


my $content       = 'Nóirín Plunkett';  # this is already UTF-8

# I like to use "octets" in the var name to remind me which way
# I'm going.
my $latin1_octets = encode("iso-8859-1", $content);


{
say "\n-----Start with UTF-8; Do nothing";
my $decoded_response = $content;
say $decoded_response;
}

{
say "\n-----Start with UTF-8; Encode only";
my $decoded_response = encode('UTF-8', $content);
say $decoded_response;
}

{
say "\n-----Start with UTF-8; Decode, then encode";
my $decoded_response = decode('UTF-8', $content);
$decoded_response    = encode('UTF-8', $decoded_response);
say $decoded_response;
}

{
say "\n-----Start with UTF-8; Decode only";
my $decoded_response = decode('UTF-8', $content);
say $decoded_response;
}

{
say "\n-----Start with UTF-8; Decode only";
my $decoded_response = decode('UTF-8', $content);
say $decoded_response;
}

{
say "\n-----Start with Latin1; Do nothing";
my $decoded_response = $latin1_octets;
say $decoded_response;
}

{
say "\n-----Start with Latin1; Decode only";
my $decoded_response = decode("iso-8859-1", $content);
say $decoded_response;
}

{
say "\n-----Start with Latin1; Decode, then encode as UTF-8";
my $decoded_response = decode("iso-8859-1", $content);
$decoded_response    = encode('UTF-8', $decoded_response);
say $decoded_response;
}
__END__


-----Start with UTF-8; Do nothing
Nóirín Plunkett

-----Start with UTF-8; Encode only
NÃ³irÃn Plunkett

-----Start with UTF-8; Decode, then encode
Nï¿½irï¿½n Plunkett

-----Start with UTF-8; Decode only
N�ir�n Plunkett

-----Start with UTF-8; Decode only
N�ir�n Plunkett

-----Start with Latin1; Do nothing
Nóirín Plunkett

-----Start with Latin1; Decode only
Nóirín Plunkett

-----Start with Latin1; Decode, then encode as UTF-8
NÃ³irÃn Plunkett
	#!/Users/brian/bin/perls/perl5.22.0

	use utf8;
	use Encode qw(encode decode);
	use v5.10;
	use open qw(:std :utf8);
	use Data::Dumper;
	use HTTP::Tiny;
	use Devel::Peek;

	=pod

	Remember:

	Octets are a representation of data.

	A Perl string is an abstract representation where we don't think about storage.

	Many people think the verbs go the opposite way they do.

	Encoding goes to octets, which is the physical storage
	encode turns a Perl string into octets

	Decoding goes from the physical storage to character strings
	decode turns octets into a Perl string

	Perl mostly handles the translation to and from physical storage,
	and many of the modules we use are going to handle the conversion.
	The cases where they don't is most likely because they guess at the
	encoding and get it wrong:

	The headers lie, because the creator saved the document incorrectly

	The meta headers have the same problem

	=cut


	my $content = 'Nóirín Plunkett'; # this is already UTF-8

	# I like to use "octets" in the var name to remind me which way
	# I'm going.
	my $latin1_octets = encode("iso-8859-1", $content);


	{
	say "\n-----Start with UTF-8; Do nothing";
	my $decoded_response = $content;
	say $decoded_response;
	}

	{
	say "\n-----Start with UTF-8; Encode only";
	my $decoded_response = encode('UTF-8', $content);
	say $decoded_response;
	}

	{
	say "\n-----Start with UTF-8; Decode, then encode";
	my $decoded_response = decode('UTF-8', $content);
	$decoded_response = encode('UTF-8', $decoded_response);
	say $decoded_response;
	}

	{
	say "\n-----Start with UTF-8; Decode only";
	my $decoded_response = decode('UTF-8', $content);
	say $decoded_response;
	}

	{
	say "\n-----Start with UTF-8; Decode only";
	my $decoded_response = decode('UTF-8', $content);
	say $decoded_response;
	}

	{
	say "\n-----Start with Latin1; Do nothing";
	my $decoded_response = $latin1_octets;
	say $decoded_response;
	}

	{
	say "\n-----Start with Latin1; Decode only";
	my $decoded_response = decode("iso-8859-1", $content);
	say $decoded_response;
	}

	{
	say "\n-----Start with Latin1; Decode, then encode as UTF-8";
	my $decoded_response = decode("iso-8859-1", $content);
	$decoded_response = encode('UTF-8', $decoded_response);
	say $decoded_response;
	}
	__END__


	-----Start with UTF-8; Do nothing
	Nóirín Plunkett

	-----Start with UTF-8; Encode only
	NÃ³irÃn Plunkett

	-----Start with UTF-8; Decode, then encode
	Nï¿½irï¿½n Plunkett

	-----Start with UTF-8; Decode only
	N�ir�n Plunkett

	-----Start with UTF-8; Decode only
	N�ir�n Plunkett

	-----Start with Latin1; Do nothing
	Nóirín Plunkett

	-----Start with Latin1; Decode only
	Nóirín Plunkett

	-----Start with Latin1; Decode, then encode as UTF-8
	NÃ³irÃn Plunkett