Skip to content

Instantly share code, notes, and snippets.

@JoeGermuska
Last active September 8, 2022 19:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JoeGermuska/270c61aa548fa7535fb57ca0ff539344 to your computer and use it in GitHub Desktop.
Save JoeGermuska/270c61aa548fa7535fb57ca0ff539344 to your computer and use it in GitHub Desktop.
SQL to fix importing Latin-1 text as if it were UTF-8
-- Sometimes one accidentally loads data that is in ISO8859-1 (aka "Latin-1") encoding having assumed that it was actually UTF-8
-- so far it seems like à is a good flag although if your data might also have that correctly, this is less simple...
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'ñ', 'ñ' ),
display_name = replace(display_name, 'ñ', 'ñ' ),
prefix_match_name = replace(prefix_match_name, 'ñ', 'ñ' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'ü', 'ü' ),
display_name = replace(display_name, 'ü', 'ü' ),
prefix_match_name = replace(prefix_match_name, 'ü', 'ü' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'ó', 'ó' ),
display_name = replace(display_name, 'ó', 'ó' ),
prefix_match_name = replace(prefix_match_name, 'ó', 'ó' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'á', 'á' ),
display_name = replace(display_name, 'á', 'á' ),
prefix_match_name = replace(prefix_match_name, 'á', 'á' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'é', 'é' ),
display_name = replace(display_name, 'é', 'é' ),
prefix_match_name = replace(prefix_match_name, 'é', 'é' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'í', 'í' ), -- note invisible second character
display_name = replace(display_name, 'í', 'í' ), -- '\xad' in string to be replaced...
prefix_match_name = replace(prefix_match_name, 'í', 'í' );
update tiger2020.census_name_lookup
set simple_name = replace(simple_name, 'Ã¥', 'å' ), -- This only appears in Hagåtña CDP, Guam which isn't in CR
display_name = replace(display_name, 'Ã¥', 'å' ), -- but it may be useful in other cases.
prefix_match_name = replace(prefix_match_name, 'Ã¥', 'å' );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment