vigneshwaranr/migrator.sh

## migrator.sh
#! /bin/sh

usage_error () {
    echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>'
    echo
    echo 'Example:'
    echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps'
    echo
    echo 'Tested on:'
    echo 'Python 2.7.3'
    echo 'SQLite 3.7.9'
}
if [ ! $# -eq 3 ]
then
  usage_error
  exit 1
fi

if [ ! -r $1 ]
then
  echo $1' is not readable.'
  echo 'Please give the correct path to sqlite_to_postgres.py'
  exit 1
fi

if [ ! -r $2 ]
then
  echo $2' is not readable'
  exit 1
fi

if [ ! -d $3 ]
then
  echo $3' is not a valid directory'
  exit 1
fi


#Get the list of tables
echo .tables | sqlite3 $2 > $3/lsoftbls

#Get dumps from sqlite
for i in `cat $3/lsoftbls`
do
  echo 'Generating sqlite dumps for '$i
  echo '.output '$3'/'$i'.dump' > $3/dumper
  echo 'pragma table_info('$i');' >> $3/dumper
  echo '.dump '$i >> $3/dumper
  echo '.quit'  >> $3/dumper
  cat $3/dumper | sqlite3 $2
done

#Use the python script to convert the sqlite dumps to psql dumps
echo
echo 'Now converting the sqlite dumps into psql format...'
echo
for i in `ls -1 $3/*.dump`
do
  python $1 $i
done


#Remove the sqlite3 dumps and the file 'lsoftbls'
echo
echo 'Removing temporary files..'
rm $3/*.dump
rm $3/lsoftbls
rm $3/dumper

echo 'Removing empty dump files..'
wc -l $3/*.psql | grep -w 0 | awk '{ print $NF }' | xargs rm

echo ; echo 'Done.'; echo
echo 'Please find the psql dumps at '$3

## sample_input.sql
0|display_name|varchar(64)|1||0
1|name|varchar(64)|1||0
2|local_site_id|integer|0||0
3|incoming_request_count|integer|0||0
4|invite_only|bool|1||0
5|id|integer|1||1
6|mailing_list|varchar(75)|1||0
7|visible|bool|1||0
PRAGMA foreign_keys=OFF;
BEGIN TRANSACTION;
CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL);
INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1);
INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1);
INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1);
INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1);
INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1);
COMMIT;

## sample_output.sql
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE);
INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE);

## sqlite_to_postgres.py
#! /usr/bin/python
# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans
# This python script serves a single purpose of converting the sqlite dumps
# into postres-compatible dumps by converting the boolean values.

import random
import sys
import os.path

BOUNDARY = '%$#@!~R@ND0M^&*()_B0UND@RY<>?:'+str(int(random.random()*(10**10)))
COLUMNS = []
COLUMN_NAMES = ""
COLUMN_TYPES = ()

def usage():
  '''
  Print usage and exit
  '''
  print "Usage: ./bool_changer.py <filename.dump>"
  sys.exit()

def fix_column_names(first_line):
  '''
  The insert statement from sqlite3 dump is as follows:
    INSERT INTO "test" VALUES(1,'Hello');

  We need to add the column information to the statements like this:
    INSERT INTO "test" (id,name) VALUES(1,'Wibble');

  This is necessary because the column orders may be different in psql db.
  '''
  global COLUMN_NAMES
  index = first_line.index(' VALUES')
  return first_line[:index] + COLUMN_NAMES + first_line[index:]

def fix_bool(stmt):
  from_here = 'VALUES('
  start_pos = stmt.index(from_here) + len(from_here)
  cur_pos = start_pos
  newstmt = stmt[:start_pos]  #  [INSERT ... VALUES(]
  stmtlen = len(stmt)
  no_of_cols = len(COLUMN_TYPES)

  for i in range(0,no_of_cols):

    if COLUMN_TYPES[i] == 'bool':
      newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same
      if stmt[cur_pos] == '1': newstmt += 'TRUE'
      elif stmt[cur_pos] == '0': newstmt += 'FALSE'
      if i == no_of_cols-1: #i.e. last column
        newstmt += ');\n'
        break
      newstmt += ','        #not last column
      cur_pos += 2
      start_pos = cur_pos
    else:
      if i == no_of_cols-1:         #if it's the last non-bool column, then
        newstmt += stmt[start_pos:] #simply insert everything that's left
        break                       #and leave

      if stmt[cur_pos] != "'":
        for cur_pos in range(cur_pos+1,stmtlen):
          if stmt[cur_pos] == ',':
            cur_pos += 1
            break #the inner loop and go to next column
      else: # the 'problematic' place. cur_pos in "'"
        cur_pos += 1 #what's next after "'"?
        while cur_pos < stmtlen:
          if stmt[cur_pos] == "'":
            if stmt[cur_pos+1] == "'": #ignore escaped quote ('')
              cur_pos += 2
              continue #searching
            elif stmt[cur_pos+1] == ",": #end of string
              cur_pos += 2
              break #to next column
          cur_pos += 1
  return newstmt

def get_psql_inserts(insert_lines):
  '''
  This method will get a list of one or more lines that together constitute
  a single insert statement from the sqlite dump, manipulates it and
  returns the list containing the psql compatible insert statement.
  '''
  global BOUNDARY

  #First fix the column name issue.
  insert_lines[0] = fix_column_names(insert_lines[0])

  if 'bool' in COLUMN_TYPES:
    insert_stmt = BOUNDARY.join(insert_lines)
    insert_stmt = fix_bool(insert_stmt)
    insert_lines = insert_stmt.split(BOUNDARY)

  return insert_lines


def process_dump(input_file,output_file):
  '''
  Process the file lazily line by line
  '''
  def process_insert(insert_lines):
    '''
    Helper method to write psql commands into output_file
    '''
    psql_inserts = get_psql_inserts(insert_lines)
    output_file.writelines(psql_inserts)

  global COLUMNS
  global COLUMN_NAMES
  global COLUMN_TYPES
  after_pragma = False     #The first few lines will be schema info upto the
                           #line that starts with "PRAGMA"
  insert_started = False
  insert_lines = []
  insert_stmt_start = 'INSERT'

  for line in input_file:
    #Get the schema info from the head of the dump file
    if not after_pragma:
      if line[0].isdigit():
        COLUMNS.append(tuple(line.split('|')[1:3]))
      elif line.startswith('PRAGMA'):
        after_pragma = True
        COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS]))
        COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS])
        #Python uses single quotes for enclosing a string.
        #But psql uses double quotes on "column names" and
        #single quotes on strings inside VALUES(..)
        COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"')
      continue

    #Ignore the lines from PRAGMA and before INSERT.
    if not insert_started:
      if line.startswith('CREATE TABLE'):
        table_name = line[line.index('"'):]
        table_name = table_name[:table_name.index('"',1)+1] # '"table_name"'
        insert_stmt_start = 'INSERT INTO ' + table_name
      elif line.startswith('INSERT'):
        insert_started = True
      else: continue

    #If the control reaches here, it must mean that the first insert statement
    #has appeared. But the insert statements may span multiple lines. So, we
    #collect those lines and process them.

    if line.startswith(insert_stmt_start):
      if insert_lines:               #True from 2nd insert statement
        process_insert(insert_lines) #Insert the previous insert statement
      insert_lines = [line]          #and append the current one
    elif insert_lines:
      insert_lines.append(line)

  if not insert_lines: return
  while insert_lines[-1].endswith(';\n') and \
        (insert_lines[-1].startswith('CREATE INDEX') or \
         insert_lines[-1].startswith('COMMIT')):
    insert_lines.pop()    #remove the create index and commit lines at the end
  process_insert(insert_lines) #fix the last insert statement


if __name__ == '__main__':
  if len(sys.argv) != 2:
    usage()

  filename = sys.argv[1]
  output_filename = filename + '.psql'

  if not os.path.isfile(filename):
    print "FATAL: Not a valid filename"
    usage()

  print sys.argv[0], ': Trying to convert', sys.argv[1]
  try:
    input_file = open(filename,'r')
    output_file = open(output_filename,'w')
    process_dump(input_file,output_file)
  finally:
    input_file.close()
    output_file.close()
  print sys.argv[0], ': Converted to', output_filename
  print
	#! /bin/sh

	usage_error () {
	echo 'Usage: sh migrator.sh <path to sqlite_to_postgres.py> <path to sqlite db file> <an empty dir to output dump files>'
	echo
	echo 'Example:'
	echo '>sh migrator.sh sqlite_to_postgres.py ~/reviewboard.db /tmp/dumps'
	echo
	echo 'Tested on:'
	echo 'Python 2.7.3'
	echo 'SQLite 3.7.9'
	}
	if [ ! $# -eq 3 ]
	then
	usage_error
	exit 1
	fi

	if [ ! -r $1 ]
	then
	echo $1' is not readable.'
	echo 'Please give the correct path to sqlite_to_postgres.py'
	exit 1
	fi

	if [ ! -r $2 ]
	then
	echo $2' is not readable'
	exit 1
	fi

	if [ ! -d $3 ]
	then
	echo $3' is not a valid directory'
	exit 1
	fi


	#Get the list of tables
	echo .tables \| sqlite3 $2 > $3/lsoftbls

	#Get dumps from sqlite
	for i in `cat $3/lsoftbls`
	do
	echo 'Generating sqlite dumps for '$i
	echo '.output '$3'/'$i'.dump' > $3/dumper
	echo 'pragma table_info('$i');' >> $3/dumper
	echo '.dump '$i >> $3/dumper
	echo '.quit' >> $3/dumper
	cat $3/dumper \| sqlite3 $2
	done

	#Use the python script to convert the sqlite dumps to psql dumps
	echo
	echo 'Now converting the sqlite dumps into psql format...'
	echo
	for i in `ls -1 $3/*.dump`
	do
	python $1 $i
	done


	#Remove the sqlite3 dumps and the file 'lsoftbls'
	echo
	echo 'Removing temporary files..'
	rm $3/*.dump
	rm $3/lsoftbls
	rm $3/dumper

	echo 'Removing empty dump files..'
	wc -l $3/*.psql \| grep -w 0 \| awk '{ print $NF }' \| xargs rm

	echo ; echo 'Done.'; echo
	echo 'Please find the psql dumps at '$3
	0\|display_name\|varchar(64)\|1\|\|0
	1\|name\|varchar(64)\|1\|\|0
	2\|local_site_id\|integer\|0\|\|0
	3\|incoming_request_count\|integer\|0\|\|0
	4\|invite_only\|bool\|1\|\|0
	5\|id\|integer\|1\|\|1
	6\|mailing_list\|varchar(75)\|1\|\|0
	7\|visible\|bool\|1\|\|0
	PRAGMA foreign_keys=OFF;
	BEGIN TRANSACTION;
	CREATE TABLE "reviews_group"("display_name" varchar(64) NOT NULL, "name" varchar(64) NOT NULL, "local_site_id" integer NULL, "incoming_request_count" integer NULL, "invite_only" bool NOT NULL, "id" integer NOT NULL UNIQUE PRIMARY KEY, "mailing_list" varchar(75) NOT NULL, "visible" bool NOT NULL);
	INSERT INTO "reviews_group" VALUES('Developers','developers',NULL,127,0,1,'',1);
	INSERT INTO "reviews_group" VALUES('Testers','testers',NULL,2,0,2,'',1);
	INSERT INTO "reviews_group" VALUES('QA','qa',NULL,1,0,3,'',1);
	INSERT INTO "reviews_group" VALUES('Release Engineers','releng',NULL,7,0,4,'',1);
	INSERT INTO "reviews_group" VALUES('Managers','mgrs',NULL,1,0,5,'',1);
	COMMIT;
	INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Developers','developers',NULL,127,FALSE,1,'',TRUE);
	INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Testers','testers',NULL,2,FALSE,2,'',TRUE);
	INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('QA','qa',NULL,1,FALSE,3,'',TRUE);
	INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Release Engineers','releng',NULL,7,FALSE,4,'',TRUE);
	INSERT INTO "reviews_group" ("display_name", "name", "local_site_id", "incoming_request_count", "invite_only", "id", "mailing_list", "visible") VALUES('Managers','mgrs',NULL,1,FALSE,5,'',TRUE);
	#! /usr/bin/python
	# SQLite3 uses 1 and 0 whereas PostgreSQL uses TRUE and FALSE for booleans
	# This python script serves a single purpose of converting the sqlite dumps
	# into postres-compatible dumps by converting the boolean values.

	import random
	import sys
	import os.path

	BOUNDARY = '%$#@!~R@ND0M^&()_B0UND@RY<>?:'+str(int(random.random()(10**10)))
	COLUMNS = []
	COLUMN_NAMES = ""
	COLUMN_TYPES = ()

	def usage():
	'''
	Print usage and exit
	'''
	print "Usage: ./bool_changer.py <filename.dump>"
	sys.exit()

	def fix_column_names(first_line):
	'''
	The insert statement from sqlite3 dump is as follows:
	INSERT INTO "test" VALUES(1,'Hello');

	We need to add the column information to the statements like this:
	INSERT INTO "test" (id,name) VALUES(1,'Wibble');

	This is necessary because the column orders may be different in psql db.
	'''
	global COLUMN_NAMES
	index = first_line.index(' VALUES')
	return first_line[:index] + COLUMN_NAMES + first_line[index:]

	def fix_bool(stmt):
	from_here = 'VALUES('
	start_pos = stmt.index(from_here) + len(from_here)
	cur_pos = start_pos
	newstmt = stmt[:start_pos] # [INSERT ... VALUES(]
	stmtlen = len(stmt)
	no_of_cols = len(COLUMN_TYPES)

	for i in range(0,no_of_cols):

	if COLUMN_TYPES[i] == 'bool':
	newstmt += stmt[start_pos:cur_pos] #nothing happens if both are same
	if stmt[cur_pos] == '1': newstmt += 'TRUE'
	elif stmt[cur_pos] == '0': newstmt += 'FALSE'
	if i == no_of_cols-1: #i.e. last column
	newstmt += ');\n'
	break
	newstmt += ',' #not last column
	cur_pos += 2
	start_pos = cur_pos
	else:
	if i == no_of_cols-1: #if it's the last non-bool column, then
	newstmt += stmt[start_pos:] #simply insert everything that's left
	break #and leave

	if stmt[cur_pos] != "'":
	for cur_pos in range(cur_pos+1,stmtlen):
	if stmt[cur_pos] == ',':
	cur_pos += 1
	break #the inner loop and go to next column
	else: # the 'problematic' place. cur_pos in "'"
	cur_pos += 1 #what's next after "'"?
	while cur_pos < stmtlen:
	if stmt[cur_pos] == "'":
	if stmt[cur_pos+1] == "'": #ignore escaped quote ('')
	cur_pos += 2
	continue #searching
	elif stmt[cur_pos+1] == ",": #end of string
	cur_pos += 2
	break #to next column
	cur_pos += 1
	return newstmt

	def get_psql_inserts(insert_lines):
	'''
	This method will get a list of one or more lines that together constitute
	a single insert statement from the sqlite dump, manipulates it and
	returns the list containing the psql compatible insert statement.
	'''
	global BOUNDARY

	#First fix the column name issue.
	insert_lines[0] = fix_column_names(insert_lines[0])

	if 'bool' in COLUMN_TYPES:
	insert_stmt = BOUNDARY.join(insert_lines)
	insert_stmt = fix_bool(insert_stmt)
	insert_lines = insert_stmt.split(BOUNDARY)

	return insert_lines


	def process_dump(input_file,output_file):
	'''
	Process the file lazily line by line
	'''
	def process_insert(insert_lines):
	'''
	Helper method to write psql commands into output_file
	'''
	psql_inserts = get_psql_inserts(insert_lines)
	output_file.writelines(psql_inserts)

	global COLUMNS
	global COLUMN_NAMES
	global COLUMN_TYPES
	after_pragma = False #The first few lines will be schema info upto the
	#line that starts with "PRAGMA"
	insert_started = False
	insert_lines = []
	insert_stmt_start = 'INSERT'

	for line in input_file:
	#Get the schema info from the head of the dump file
	if not after_pragma:
	if line[0].isdigit():
	COLUMNS.append(tuple(line.split('\|')[1:3]))
	elif line.startswith('PRAGMA'):
	after_pragma = True
	COLUMN_NAMES = str(tuple([name for name,datatype in COLUMNS]))
	COLUMN_TYPES = tuple([datatype for name,datatype in COLUMNS])
	#Python uses single quotes for enclosing a string.
	#But psql uses double quotes on "column names" and
	#single quotes on strings inside VALUES(..)
	COLUMN_NAMES = ' ' + COLUMN_NAMES.replace("'",'"')
	continue

	#Ignore the lines from PRAGMA and before INSERT.
	if not insert_started:
	if line.startswith('CREATE TABLE'):
	table_name = line[line.index('"'):]
	table_name = table_name[:table_name.index('"',1)+1] # '"table_name"'
	insert_stmt_start = 'INSERT INTO ' + table_name
	elif line.startswith('INSERT'):
	insert_started = True
	else: continue

	#If the control reaches here, it must mean that the first insert statement
	#has appeared. But the insert statements may span multiple lines. So, we
	#collect those lines and process them.

	if line.startswith(insert_stmt_start):
	if insert_lines: #True from 2nd insert statement
	process_insert(insert_lines) #Insert the previous insert statement
	insert_lines = [line] #and append the current one
	elif insert_lines:
	insert_lines.append(line)

	if not insert_lines: return
	while insert_lines[-1].endswith(';\n') and \
	(insert_lines[-1].startswith('CREATE INDEX') or \
	insert_lines[-1].startswith('COMMIT')):
	insert_lines.pop() #remove the create index and commit lines at the end
	process_insert(insert_lines) #fix the last insert statement




	if __name__ == '__main__':
	if len(sys.argv) != 2:
	usage()

	filename = sys.argv[1]
	output_filename = filename + '.psql'

	if not os.path.isfile(filename):
	print "FATAL: Not a valid filename"
	usage()

	print sys.argv[0], ': Trying to convert', sys.argv[1]
	try:
	input_file = open(filename,'r')
	output_file = open(output_filename,'w')
	process_dump(input_file,output_file)
	finally:
	input_file.close()
	output_file.close()
	print sys.argv[0], ': Converted to', output_filename
	print