Skip to content

Instantly share code, notes, and snippets.

@eduardonunesp
Created January 13, 2014 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eduardonunesp/8404905 to your computer and use it in GitHub Desktop.
Save eduardonunesp/8404905 to your computer and use it in GitHub Desktop.
The idea is make easy to migrate of a huge mysql table and translate to Django ORM (incomplete)
import re
import sys
CREATE_TABLE_STATEMENT = re.compile('CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?`?(\w+)`?\s+\(', re.I)
END_CREATE_TABLE_STATEMENT = re.compile('\)\s*(ENGINE\s*=\s*(\w+))?\s*;', re.I)
STRUCTURE_FIELD = re.compile('\s+((?!UNIQUE)(?!PRIMARY)(?!KEY)(?!INDEX)`?\w+`?.*),', re.I)
CONSTRAING_FIELD = re.compile('\s+(UNIQUE.*|KEY.*|INDEX.*),?', re.I)
EXTRACT_VARCHAR_TYPE = re.compile('`?\w+`?\s+(VARCHAR\((\d+)\))\s+', re.I)
EXTRACT_INT_TYPE = re.compile('`?\w+`?\s+(INT\((\d+)\))\s+', re.I)
EXTRACT_ENUM_TYPE = re.compile('`?\w+`?\s+(ENUM\((.*)\))\s+', re.I)
EXTRACT_FIELD_NAME = re.compile('(?!ID)(?!UNIQUE)(?!PRIMARY)(?!KEY)`?(\w+)`?\s+', re.I)
EXTRACT_OPTIONS = re.compile('^.*(DEFAULT\s+NULL|NOT\s+NULL).*$', re.I)
STATE_SEARCHING_CREATE_STATEMENT = 'STATE_SEARCHING_CREATE_STATEMENT'
STATE_SEARCHING_STRUCTURE_FIELDS = 'STATE_SEARCHING_STRUCTURE_FIELDS'
STATE_SEARCHING_END_CREATE_STATEMENT = 'STATE_SEARCHING_END_CREATE_STATEMENT'
CLASS_TPL = "class %s(models.Model):"
FIELD_TPL = "%s = %s"
CHAR_FIELD_TPL = 'models.CharField(max_length="%s", blank=%s)'
INT_FIELD_TPL = 'models.IntegerField(blank=%s)'
ENUM_FIELD_TPL = 'EnumField(values=(%s), blank=%s)'
state = STATE_SEARCHING_CREATE_STATEMENT
def search_create_statement(line):
global state
match_create_statement = CREATE_TABLE_STATEMENT.match(line)
if match_create_statement:
state = STATE_SEARCHING_STRUCTURE_FIELDS
table_name = match_create_statement.groups()[-1]
print 'found table: %s' % table_name
return table_name
def search_structure_fields(line):
global state
match_structure_field = STRUCTURE_FIELD.match(line)
if match_structure_field:
field = match_structure_field.groups()[0]
print 'found structure field: %s' % field
return ('structure', field)
match_constraint_statment = CONSTRAING_FIELD.match(line)
if match_constraint_statment:
field = match_constraint_statment.groups()[0]
print 'found constraint field: %s' % field
return ('constraint', field)
def search_end_statement(line):
global state
match_end_create_statement = END_CREATE_TABLE_STATEMENT.match(line)
if match_end_create_statement:
state = STATE_SEARCHING_CREATE_STATEMENT
return True
return False
def parse_field_name(field):
field_match_res = EXTRACT_FIELD_NAME.match(field)
if field_match_res:
name = field_match_res.groups()[-1]
if name != 'id':
return name
def parse_field_type(field):
field_match_res = EXTRACT_VARCHAR_TYPE.match(field)
if field_match_res:
size = field_match_res.groups()[-1]
return ('varchar', size)
field_match_res = EXTRACT_INT_TYPE.match(field)
if field_match_res:
size = field_match_res.groups()[-1]
return ('int', size)
field_match_res = EXTRACT_ENUM_TYPE.match(field)
if field_match_res:
opts = field_match_res.groups()[-1]
return ('enum', opts)
print 'Error: Cannot find field type for %s' % field
exit(1)
def parse_field_options(field):
field_match_res = EXTRACT_OPTIONS.match(field)
if field_match_res:
is_null = field_match_res.groups()[-1]
if is_null.lower() == 'default null':
return True
return False
def parse_constraint(field):
print field
def create_class(tables):
django_classes = []
for table, fields in tables.items():
django_class = CLASS_TPL % table
for field in fields:
structure_type, field = field
if structure_type == 'structure':
name = parse_field_name(field)
if not name:
continue
field_type, val = parse_field_type(field)
blank = parse_field_options(field)
django_field = None
if field_type == 'varchar':
django_field = CHAR_FIELD_TPL % (val, blank)
elif field_type == 'int':
django_field = INT_FIELD_TPL % (blank)
elif field_type == 'enum':
django_field = ENUM_FIELD_TPL % (val, blank)
else:
print 'unknow field type for %s' % name
django_class += "\n " + FIELD_TPL % (name, django_field)
django_classes.append(django_class)
print "\n\n".join(django_classes)
if __name__ == '__main__':
if len(sys.argv) < 2:
print 'Need mysql dump file (.sql)'
file_name = sys.argv[1]
file_type = file_name.split('.')[-1]
if file_type != 'sql':
print 'File type need to be .sql'
table_name = None
fields = []
tables = {}
sql_buffer = open(file_name, 'r').readlines()
for line in sql_buffer:
if state == STATE_SEARCHING_CREATE_STATEMENT:
table_name_res = search_create_statement(line)
if table_name_res:
table_name = table_name_res
if state == STATE_SEARCHING_STRUCTURE_FIELDS:
field_name_res = search_structure_fields(line)
if field_name_res:
fields.append(field_name_res)
if search_end_statement(line):
print 'end table %s' % table_name
tables[table_name] = fields
create_class(tables)
print 'Last state %s' % state
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment