Created
June 12, 2011 08:03
-
-
Save wrunk/1021336 to your computer and use it in GitHub Desktop.
A base for coverting a mysql5 dump to sqlite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# This library is free software; you can redistribute it and/or | |
# modify it under the terms of the GNU Lesser General Public | |
# License as published by the Free Software Foundation; either | |
# version 2.1 of the License, or (at your option) any later version. | |
# This library is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
# Lesser General Public License for more details. | |
# You should have received a copy of the GNU Lesser General Public | |
# License along with this library; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
# | |
# **WARNING** This is a very specific script to convert from a mysql dump to | |
# an importable sqlite3 file. | |
# | |
# In my case I was converting the data of a django project from | |
# Mysql 5X to sqlite3 | |
# | |
# Expected dump format: | |
# mysqldump --skip-extended-insert --compact -u ** -p** database_name > mysql_dump.db | |
# | |
# You would then run this script like: | |
# python mysql5_to_sqlite3.py mysql_dump.db "/tmp/some_sqlite3_file.sql" | |
# **** You need to then use Django to create your sqlite db skeleton. | |
# If you use syncdb MAKE SURE NOT to create an admin user when prompted. | |
# python manage.py syncdb (assuming you have some db file configured) | |
# Load the sql into a new sqlite3 Database file | |
# sqlite3 django_synceddb.db < /tmp/some_sqlite3_file.sql | |
# | |
# The import will likely complain about some dup keys, which is expected due to | |
# the syncdb. The dups are standard install records, and are harmless. | |
# | |
import re | |
import sys | |
# ---------------------------------------------------------------------------- # | |
# Custom line processing functions | |
# Needs to take in a line and return a line | |
# ---------------------------------------------------------------------------- # | |
def remove_last_column(line): | |
''' Very specific function to remove last value from an insert line. | |
INSERT INTO "test" VALUES (1,1,1,'test','not_found','page'); | |
In the above row, this function would remove the ",'page'" | |
''' | |
if 'test_table' in line: | |
line = re.sub(r",'[^']*'\);",');', line) | |
print line.strip() | |
return line | |
class Convert(object): | |
''' Convert from mysqldump to sqlite3 | |
Only imports the data (inserts). ''' | |
subs = ( | |
# Compiled match.....replacement | |
# | |
# Sqlite escapes the single quote inside a block with two single quotes | |
(re.compile(r"\\'"), "''"), | |
# Mysql escaped all my double quotes unnecessarily | |
(re.compile(r'\\"'), '"'), | |
# Make the backtick a double quote | |
(re.compile(r'`'), '"'), | |
# Line breaks need to become real HTTP compliant line breaks. | |
# Mysql will escape them to actually look like \r\n | |
(re.compile(r'\\r\\n'), '\r\n'), | |
(re.compile(r'\\n'), '\r\n'), | |
# Kill escaped backslashes | |
(re.compile(r'\\\\'), r'\\'), | |
) | |
def __init__(self, | |
in_file_path=None, | |
out_file_path=None, | |
adjustment_funcs=None): | |
self.in_file_path = in_file_path | |
self.out_file_path = out_file_path | |
self.lines = [] | |
self.adjustment_funcs = adjustment_funcs | |
if self.adjustment_funcs is None: | |
self.adjustment_funcs = [] | |
self.load_dump() | |
def load_dump(self): | |
''' Load the mysqldump into the line list in memory. | |
This script only looks at insert lines. | |
''' | |
fp = open(self.in_file_path) | |
for line in fp.readlines(): | |
if line.lower().startswith('insert'): | |
self._check_line(line) | |
fp.close() | |
def write_file(self, file_path=None): | |
if file_path is None: | |
if self.out_file_path is None: | |
raise ValueError('file_path to write_file is invalid') | |
file_path = self.out_file_path | |
fp = open(file_path, 'w') | |
fp.writelines(self.lines) | |
fp.close() | |
def _check_line(self, line): | |
''' Massage a line into shape and perform any external needed | |
adjustments ''' | |
for match, replacement in self.subs: | |
line = match.sub(replacement, line) | |
for func in self.adjustment_funcs: | |
line = func(line) | |
# Our line processors may return None to indicate the line is suppose | |
# to be ignored. | |
if line is not None: | |
self.lines.append(line) | |
if __name__ == '__main__': | |
# Maybe allow the file name to be passed in | |
convert = Convert( | |
in_file_path=sys.argv[1], | |
out_file_path=sys.argv[2], | |
adjustment_funcs=[remove_last_column], | |
) | |
convert.write_file() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment