Last active
September 8, 2022 19:41
-
-
Save JoeGermuska/270c61aa548fa7535fb57ca0ff539344 to your computer and use it in GitHub Desktop.
SQL to fix importing Latin-1 text as if it were UTF-8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Sometimes one accidentally loads data that is in ISO8859-1 (aka "Latin-1") encoding having assumed that it was actually UTF-8 | |
-- so far it seems like à is a good flag although if your data might also have that correctly, this is less simple... | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'ñ', 'ñ' ), | |
display_name = replace(display_name, 'ñ', 'ñ' ), | |
prefix_match_name = replace(prefix_match_name, 'ñ', 'ñ' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'ü', 'ü' ), | |
display_name = replace(display_name, 'ü', 'ü' ), | |
prefix_match_name = replace(prefix_match_name, 'ü', 'ü' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'ó', 'ó' ), | |
display_name = replace(display_name, 'ó', 'ó' ), | |
prefix_match_name = replace(prefix_match_name, 'ó', 'ó' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'á', 'á' ), | |
display_name = replace(display_name, 'á', 'á' ), | |
prefix_match_name = replace(prefix_match_name, 'á', 'á' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'é', 'é' ), | |
display_name = replace(display_name, 'é', 'é' ), | |
prefix_match_name = replace(prefix_match_name, 'é', 'é' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'Ã', 'í' ), -- note invisible second character | |
display_name = replace(display_name, 'Ã', 'í' ), -- '\xad' in string to be replaced... | |
prefix_match_name = replace(prefix_match_name, 'Ã', 'í' ); | |
update tiger2020.census_name_lookup | |
set simple_name = replace(simple_name, 'Ã¥', 'å' ), -- This only appears in Hagåtña CDP, Guam which isn't in CR | |
display_name = replace(display_name, 'Ã¥', 'å' ), -- but it may be useful in other cases. | |
prefix_match_name = replace(prefix_match_name, 'Ã¥', 'å' ); | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment