Skip to content

Instantly share code, notes, and snippets.

@wrunk
Created June 12, 2011 08:03
Show Gist options
  • Save wrunk/1021336 to your computer and use it in GitHub Desktop.
Save wrunk/1021336 to your computer and use it in GitHub Desktop.
A base for coverting a mysql5 dump to sqlite
#!/usr/bin/env python
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# **WARNING** This is a very specific script to convert from a mysql dump to
# an importable sqlite3 file.
#
# In my case I was converting the data of a django project from
# Mysql 5X to sqlite3
#
# Expected dump format:
# mysqldump --skip-extended-insert --compact -u ** -p** database_name > mysql_dump.db
#
# You would then run this script like:
# python mysql5_to_sqlite3.py mysql_dump.db "/tmp/some_sqlite3_file.sql"
# **** You need to then use Django to create your sqlite db skeleton.
# If you use syncdb MAKE SURE NOT to create an admin user when prompted.
# python manage.py syncdb (assuming you have some db file configured)
# Load the sql into a new sqlite3 Database file
# sqlite3 django_synceddb.db < /tmp/some_sqlite3_file.sql
#
# The import will likely complain about some dup keys, which is expected due to
# the syncdb. The dups are standard install records, and are harmless.
#
import re
import sys
# ---------------------------------------------------------------------------- #
# Custom line processing functions
# Needs to take in a line and return a line
# ---------------------------------------------------------------------------- #
def remove_last_column(line):
''' Very specific function to remove last value from an insert line.
INSERT INTO "test" VALUES (1,1,1,'test','not_found','page');
In the above row, this function would remove the ",'page'"
'''
if 'test_table' in line:
line = re.sub(r",'[^']*'\);",');', line)
print line.strip()
return line
class Convert(object):
''' Convert from mysqldump to sqlite3
Only imports the data (inserts). '''
subs = (
# Compiled match.....replacement
#
# Sqlite escapes the single quote inside a block with two single quotes
(re.compile(r"\\'"), "''"),
# Mysql escaped all my double quotes unnecessarily
(re.compile(r'\\"'), '"'),
# Make the backtick a double quote
(re.compile(r'`'), '"'),
# Line breaks need to become real HTTP compliant line breaks.
# Mysql will escape them to actually look like \r\n
(re.compile(r'\\r\\n'), '\r\n'),
(re.compile(r'\\n'), '\r\n'),
# Kill escaped backslashes
(re.compile(r'\\\\'), r'\\'),
)
def __init__(self,
in_file_path=None,
out_file_path=None,
adjustment_funcs=None):
self.in_file_path = in_file_path
self.out_file_path = out_file_path
self.lines = []
self.adjustment_funcs = adjustment_funcs
if self.adjustment_funcs is None:
self.adjustment_funcs = []
self.load_dump()
def load_dump(self):
''' Load the mysqldump into the line list in memory.
This script only looks at insert lines.
'''
fp = open(self.in_file_path)
for line in fp.readlines():
if line.lower().startswith('insert'):
self._check_line(line)
fp.close()
def write_file(self, file_path=None):
if file_path is None:
if self.out_file_path is None:
raise ValueError('file_path to write_file is invalid')
file_path = self.out_file_path
fp = open(file_path, 'w')
fp.writelines(self.lines)
fp.close()
def _check_line(self, line):
''' Massage a line into shape and perform any external needed
adjustments '''
for match, replacement in self.subs:
line = match.sub(replacement, line)
for func in self.adjustment_funcs:
line = func(line)
# Our line processors may return None to indicate the line is suppose
# to be ignored.
if line is not None:
self.lines.append(line)
if __name__ == '__main__':
# Maybe allow the file name to be passed in
convert = Convert(
in_file_path=sys.argv[1],
out_file_path=sys.argv[2],
adjustment_funcs=[remove_last_column],
)
convert.write_file()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment