-
-
Save vigneshwaranr/3454093 to your computer and use it in GitHub Desktop.
#! /bin/sh | |
usage_error () { | |
echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>' | |
echo | |
echo 'Example:' | |
echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps' | |
echo | |
echo 'Tested on:' | |
echo 'Python 2.7.3' | |
echo 'SQLite 3.7.9' | |
} | |
if [ ! $# -eq 3 ] | |
then | |
usage_error | |
exit 1 | |
fi | |
if [ ! -r $1 ] | |
then | |
echo $1' is not readable.' | |
echo 'Please give the correct path to sqlite_to_postgres.py' | |
exit 1 | |
fi | |
if [ ! -r $2 ] | |
then | |
echo $2' is not readable' | |
exit 1 | |
fi | |
if [ ! -d $3 ] | |
then | |
echo $3' is not a valid directory' | |
exit 1 | |
fi | |
#Get the list of tables | |
echo .tables | sqlite3 $2 > $3/lsoftbls | |
#Get dumps from sqlite | |
for i in `cat $3/lsoftbls` | |
do | |
echo 'Generating sqlite dumps for '$i | |
echo '.output '$3'/'$i'.dump' > $3/dumper | |
echo 'pragma table_info('$i');' >> $3/dumper | |
echo '.dump '$i >> $3/dumper | |
echo '.quit' >> $3/dumper | |
cat $3/dumper | sqlite3 $2 | |
done | |
#Use the python script to convert the sqlite dumps to psql dumps | |
echo | |
echo 'Now converting the sqlite dumps into psql format...' | |
echo | |
for i in `ls -1 $3/*.dump` | |
do | |
python $1 $i | |
done | |
#Remove the sqlite3 dumps and the file 'lsoftbls' | |
echo | |
echo 'Removing temporary files..' | |
rm $3/*.dump | |
rm $3/lsoftbls | |
rm $3/dumper | |
echo 'Removing empty dump files..' | |
wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm | |
echo ; echo 'Done.'; echo | |
echo 'Please find the psql dumps at '$3 |
0|display_name|varchar(64)|1||0 | |
1|name|varchar(64)|1||0 | |
2|local_site_id|integer|0||0 | |
3|incoming_request_count|integer|0||0 | |
4|invite_only|bool|1||0 | |
5|id|integer|1||1 | |
6|mailing_list|varchar(75)|1||0 | |
7|visible|bool|1||0 | |
PRAGMA foreign_keys=OFF; | |
BEGIN TRANSACTION; | |
CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL); | |
INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1); | |
INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1); | |
INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1); | |
INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1); | |
INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1); | |
COMMIT; |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE); | |
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE); |
#! /usr/bin/python | |
# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans | |
# This python script serves a single purpose of converting the sqlite dumps | |
# into postres-compatible dumps by converting the boolean values. | |
import random | |
import sys | |
import os.path | |
BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10))) | |
COLUMNS = [] | |
COLUMN_NAMES = "" | |
COLUMN_TYPES = () | |
def usage(): | |
''' | |
Print usage and exit | |
''' | |
print "Usage: ./bool_changer.py <filename.dump>" | |
sys.exit() | |
def fix_column_names(first_line): | |
''' | |
The insert statement from sqlite3 dump is as follows: | |
INSERT INTO "test" VALUES(1,'Hello'); | |
We need to add the column information to the statements like this: | |
INSERT INTO "test" (id,name) VALUES(1,'Wibble'); | |
This is necessary because the column orders may be different in psql db. | |
''' | |
global COLUMN_NAMES | |
index = first_line.index(' VALUES') | |
return first_line[:index] + COLUMN_NAMES + first_line[index:] | |
def fix_bool(stmt): | |
from_here = 'VALUES(' | |
start_pos = stmt.index(from_here) + len(from_here) | |
cur_pos = start_pos | |
newstmt = stmt[:start_pos] # [INSERT ... VALUES(] | |
stmtlen = len(stmt) | |
no_of_cols = len(COLUMN_TYPES) | |
for i in range(0,no_of_cols): | |
if COLUMN_TYPES[i] == 'bool': | |
newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same | |
if stmt[cur_pos] == '1': newstmt += 'TRUE' | |
elif stmt[cur_pos] == '0': newstmt += 'FALSE' | |
if i == no_of_cols-1: #i.e. last column | |
newstmt += ');\n' | |
break | |
newstmt += ',' #not last column | |
cur_pos += 2 | |
start_pos = cur_pos | |
else: | |
if i == no_of_cols-1: #if it's the last non-bool column, then | |
newstmt += stmt[start_pos:] #simply insert everything that's left | |
break #and leave | |
if stmt[cur_pos] != "'": | |
for cur_pos in range(cur_pos+1,stmtlen): | |
if stmt[cur_pos] == ',': | |
cur_pos += 1 | |
break #the inner loop and go to next column | |
else: # the 'problematic' place. cur_pos in "'" | |
cur_pos += 1 #what's next after "'"? | |
while cur_pos < stmtlen: | |
if stmt[cur_pos] == "'": | |
if stmt[cur_pos+1] == "'": #ignore escaped quote ('') | |
cur_pos += 2 | |
continue #searching | |
elif stmt[cur_pos+1] == ",": #end of string | |
cur_pos += 2 | |
break #to next column | |
cur_pos += 1 | |
return newstmt | |
def get_psql_inserts(insert_lines): | |
''' | |
This method will get a list of one or more lines that together constitute | |
a single insert statement from the sqlite dump, manipulates it and | |
returns the list containing the psql compatible insert statement. | |
''' | |
global BOUNDARY | |
#First fix the column name issue. | |
insert_lines[0] = fix_column_names(insert_lines[0]) | |
if 'bool' in COLUMN_TYPES: | |
insert_stmt = BOUNDARY.join(insert_lines) | |
insert_stmt = fix_bool(insert_stmt) | |
insert_lines = insert_stmt.split(BOUNDARY) | |
return insert_lines | |
def process_dump(input_file,output_file): | |
''' | |
Process the file lazily line by line | |
''' | |
def process_insert(insert_lines): | |
''' | |
Helper method to write psql commands into output_file | |
''' | |
psql_inserts = get_psql_inserts(insert_lines) | |
output_file.writelines(psql_inserts) | |
global COLUMNS | |
global COLUMN_NAMES | |
global COLUMN_TYPES | |
after_pragma = False #The first few lines will be schema info upto the | |
#line that starts with "PRAGMA" | |
insert_started = False | |
insert_lines = [] | |
insert_stmt_start = 'INSERT' | |
for line in input_file: | |
#Get the schema info from the head of the dump file | |
if not after_pragma: | |
if line[0].isdigit(): | |
COLUMNS.append(tuple(line.split('|')[1:3])) | |
elif line.startswith('PRAGMA'): | |
after_pragma = True | |
COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS])) | |
COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS]) | |
#Python uses single quotes for enclosing a string. | |
#But psql uses double quotes on "column names" and | |
#single quotes on strings inside VALUES(..) | |
COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"') | |
continue | |
#Ignore the lines from PRAGMA and before INSERT. | |
if not insert_started: | |
if line.startswith('CREATE TABLE'): | |
table_name = line[line.index('"'):] | |
table_name = table_name[:table_name.index('"',1)+1] # '"table_name"' | |
insert_stmt_start = 'INSERT INTO ' + table_name | |
elif line.startswith('INSERT'): | |
insert_started = True | |
else: continue | |
#If the control reaches here, it must mean that the first insert statement | |
#has appeared. But the insert statements may span multiple lines. So, we | |
#collect those lines and process them. | |
if line.startswith(insert_stmt_start): | |
if insert_lines: #True from 2nd insert statement | |
process_insert(insert_lines) #Insert the previous insert statement | |
insert_lines = [line] #and append the current one | |
elif insert_lines: | |
insert_lines.append(line) | |
if not insert_lines: return | |
while insert_lines[-1].endswith(';\n') and \ | |
(insert_lines[-1].startswith('CREATE INDEX') or \ | |
insert_lines[-1].startswith('COMMIT')): | |
insert_lines.pop() #remove the create index and commit lines at the end | |
process_insert(insert_lines) #fix the last insert statement | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
usage() | |
filename = sys.argv[1] | |
output_filename = filename + '.psql' | |
if not os.path.isfile(filename): | |
print "FATAL: Not a valid filename" | |
usage() | |
print sys.argv[0], ': Trying to convert', sys.argv[1] | |
try: | |
input_file = open(filename,'r') | |
output_file = open(output_filename,'w') | |
process_dump(input_file,output_file) | |
finally: | |
input_file.close() | |
output_file.close() | |
print sys.argv[0], ': Converted to', output_filename | |
If you want a psql compatible dump for a single table, then use these commands from inside the sqlite3 shell.
.output tablename.dump
pragma table_info('tablename');
.dump tablename
.quit
Then execute
python sqlite_to_postgres.py tablename.dump
Thank you for the sqlite3 to postgressql script.
I am migrating a django project. I appreciate you writing this. It worked perfectly!
Thank you
Traceback (most recent call last):
File "sqlite_to_postgres.py", line 180, in <module>
process_dump(input_file,output_file)
File "sqlite_to_postgres.py", line 137, in process_dump
table_name = line[line.index('"'):]
ValueError: substring not found
Changed lines 136 and 137 to this:
table_name = line[line.index('['):]
table_name = table_name[:table_name.index(']',1)+1]
My sqlite3 commands were not quoting the table names, instead it uses brackets. This fixes that.
awesome. I found this actually also trying to convert a sqlite reviewboard database into postgres!
How did you know the right order to import all the separate dump files (one per table) without violating foreign key constraints?
FYI. Found a way to do import them in the right order.
- Output the schema separately from sqlite
- Manually edit schema and keep trying to import until the order of the tables imports correctly w/o foreign key reference errors
- A combination of grep/sed on that schema.sql to extract out the table names in order
- Some more shell scripting on those table names to concatenate your various dump files into the right order
In case this is helpful to someone else...
cat schema.sql | grep "CREATE TABLE" | sed -e "s/CREATE TABLE \"//" | sed -e "s/\".*//" > table-order.txt
echo "BEGIN TRANSACTION;" > whole-dump.sql
for t in `cat table-order.txt`; do
cat dumps/$t.dump.sql >> whole-dump.sql
done
echo "COMMIT;" >> whole-dump.sql
@matthauck I just disabled the constraints temporarily in a transaction. I concatenated all the sql commands into one file and wrapped the contents like the following..
begin;
SET CONSTRAINTS ALL DEFERRED;
<ALL DUMP COMMANDS>
commit;
Then imported this dump in one go! Make sure you test this plenty of times on backup data and setup.
Reference: http://www.openscope.net/2012/08/23/subverting-foreign-key-constraints-in-postgres-or-mysql/
awesome, thanks, this works great! Just one thing: You'll also probably need to reset all your sequences. For that, https://wiki.postgresql.org/wiki/Fixing_Sequences has a generic solution.
This was a life saver. Thanks a lot!
Just a note, I was using this to export a Grafana SQLite3 DB, ran into the same error as @smclenithan - Grafana uses backticks (`) instead of quotations marks for table names, so:
Change lines 136 and 137 to:
table_name = line[line.index('`'):]
table_name = table_name[:table_name.index('`',1)+1
And it works.
Cheers
@gl1tchh and @smclenithan offers own fixes for "substring not found" error but both is useless in my case.
My environment:
SQLite version 3.8.7.1
Debian GNU/Linux 8
My sqlite3 dump exports CREATE TABLE table_name without "quotations" or backticks
or any other separator:
CREATE TABLE v_users (
...
INSERT INTO "v_users" VALUES(
...
So, I check how python works with string (I'm not a programmer) and make own fix for searching tablename in the CREATE TABLE string:
if line.startswith('CREATE TABLE'):
table_name = line[13:] #--modified original script line 136
table_name = table_name[:table_name.index(' ')] # '"table_name"' #--modified original script line 137
insert_stmt_start = 'INSERT INTO "' + table_name + '"' #--modified original script line 138
Explaining:
String part "CREATE TABLE" has 12 chars and with 1 white space we got a 13 characters. So our table_name located after 13 character (line 136) and before the next white space (line 137). But also we must add quotations in the line 138, because our table_name is not quoted, but 'INSERT INTO ' command must have quotations for table_name.
There was a bug before when the dump files had crazy sql statements like these:
Fixed that bug :)