Skip to content

Instantly share code, notes, and snippets.

@tpokorra
Created April 3, 2021 04:26
Show Gist options
  • Save tpokorra/6e077a34a951516b12509c2f4ce7d514 to your computer and use it in GitHub Desktop.
Save tpokorra/6e077a34a951516b12509c2f4ce7d514 to your computer and use it in GitHub Desktop.
fix mediawiki mysql database, replace Umlaut, when converting from latin1 to utf-8
#!/usr/bin/python3
# first call:
# mysqldump --add-drop-table database_to_correct | replace CHARSET=latin1 CHARSET=utf8 | iconv -f latin1 -t utf8 | mysql database_to_correct --default_character_set utf8 > fixed.sql
# then run this script
out = open("converted.sql", "w")
with open('fixed.sql', encoding='utf8') as f:
for line in f:
line = line.replace('CHARACTER SET latin1 COLLATE latin1_bin', 'CHARACTER SET utf8 COLLATE utf8_bin')
line = line.replace('â<80><93>', '-')
line = line.replace('â<80><99>', "'")
line = line.replace('Ã' + chr(0x83) + '¼', 'ü')
line = line.replace('ü', 'ü')
line = line.replace('ä', 'ä')
line = line.replace('Ã' + chr(0x83) + '¶', 'ö');
line = line.replace('ö', 'ö');
line = line.replace('Ã' + chr(0x9c), 'Ü')
line = line.replace('Ã' + chr(0x9f), 'ß')
line = line.replace('Ã' + chr(0x84), 'Ä')
line = line.replace('u8c3a4', 'ä')
out.write(line)
#print(line.strip()[:300])
out.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment