Created
January 13, 2014 17:58
-
-
Save eduardonunesp/8404905 to your computer and use it in GitHub Desktop.
The idea is make easy to migrate of a huge mysql table and translate to Django ORM (incomplete)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
CREATE_TABLE_STATEMENT = re.compile('CREATE\s+TABLE\s+(IF\s+NOT\s+EXISTS\s+)?`?(\w+)`?\s+\(', re.I) | |
END_CREATE_TABLE_STATEMENT = re.compile('\)\s*(ENGINE\s*=\s*(\w+))?\s*;', re.I) | |
STRUCTURE_FIELD = re.compile('\s+((?!UNIQUE)(?!PRIMARY)(?!KEY)(?!INDEX)`?\w+`?.*),', re.I) | |
CONSTRAING_FIELD = re.compile('\s+(UNIQUE.*|KEY.*|INDEX.*),?', re.I) | |
EXTRACT_VARCHAR_TYPE = re.compile('`?\w+`?\s+(VARCHAR\((\d+)\))\s+', re.I) | |
EXTRACT_INT_TYPE = re.compile('`?\w+`?\s+(INT\((\d+)\))\s+', re.I) | |
EXTRACT_ENUM_TYPE = re.compile('`?\w+`?\s+(ENUM\((.*)\))\s+', re.I) | |
EXTRACT_FIELD_NAME = re.compile('(?!ID)(?!UNIQUE)(?!PRIMARY)(?!KEY)`?(\w+)`?\s+', re.I) | |
EXTRACT_OPTIONS = re.compile('^.*(DEFAULT\s+NULL|NOT\s+NULL).*$', re.I) | |
STATE_SEARCHING_CREATE_STATEMENT = 'STATE_SEARCHING_CREATE_STATEMENT' | |
STATE_SEARCHING_STRUCTURE_FIELDS = 'STATE_SEARCHING_STRUCTURE_FIELDS' | |
STATE_SEARCHING_END_CREATE_STATEMENT = 'STATE_SEARCHING_END_CREATE_STATEMENT' | |
CLASS_TPL = "class %s(models.Model):" | |
FIELD_TPL = "%s = %s" | |
CHAR_FIELD_TPL = 'models.CharField(max_length="%s", blank=%s)' | |
INT_FIELD_TPL = 'models.IntegerField(blank=%s)' | |
ENUM_FIELD_TPL = 'EnumField(values=(%s), blank=%s)' | |
state = STATE_SEARCHING_CREATE_STATEMENT | |
def search_create_statement(line): | |
global state | |
match_create_statement = CREATE_TABLE_STATEMENT.match(line) | |
if match_create_statement: | |
state = STATE_SEARCHING_STRUCTURE_FIELDS | |
table_name = match_create_statement.groups()[-1] | |
print 'found table: %s' % table_name | |
return table_name | |
def search_structure_fields(line): | |
global state | |
match_structure_field = STRUCTURE_FIELD.match(line) | |
if match_structure_field: | |
field = match_structure_field.groups()[0] | |
print 'found structure field: %s' % field | |
return ('structure', field) | |
match_constraint_statment = CONSTRAING_FIELD.match(line) | |
if match_constraint_statment: | |
field = match_constraint_statment.groups()[0] | |
print 'found constraint field: %s' % field | |
return ('constraint', field) | |
def search_end_statement(line): | |
global state | |
match_end_create_statement = END_CREATE_TABLE_STATEMENT.match(line) | |
if match_end_create_statement: | |
state = STATE_SEARCHING_CREATE_STATEMENT | |
return True | |
return False | |
def parse_field_name(field): | |
field_match_res = EXTRACT_FIELD_NAME.match(field) | |
if field_match_res: | |
name = field_match_res.groups()[-1] | |
if name != 'id': | |
return name | |
def parse_field_type(field): | |
field_match_res = EXTRACT_VARCHAR_TYPE.match(field) | |
if field_match_res: | |
size = field_match_res.groups()[-1] | |
return ('varchar', size) | |
field_match_res = EXTRACT_INT_TYPE.match(field) | |
if field_match_res: | |
size = field_match_res.groups()[-1] | |
return ('int', size) | |
field_match_res = EXTRACT_ENUM_TYPE.match(field) | |
if field_match_res: | |
opts = field_match_res.groups()[-1] | |
return ('enum', opts) | |
print 'Error: Cannot find field type for %s' % field | |
exit(1) | |
def parse_field_options(field): | |
field_match_res = EXTRACT_OPTIONS.match(field) | |
if field_match_res: | |
is_null = field_match_res.groups()[-1] | |
if is_null.lower() == 'default null': | |
return True | |
return False | |
def parse_constraint(field): | |
print field | |
def create_class(tables): | |
django_classes = [] | |
for table, fields in tables.items(): | |
django_class = CLASS_TPL % table | |
for field in fields: | |
structure_type, field = field | |
if structure_type == 'structure': | |
name = parse_field_name(field) | |
if not name: | |
continue | |
field_type, val = parse_field_type(field) | |
blank = parse_field_options(field) | |
django_field = None | |
if field_type == 'varchar': | |
django_field = CHAR_FIELD_TPL % (val, blank) | |
elif field_type == 'int': | |
django_field = INT_FIELD_TPL % (blank) | |
elif field_type == 'enum': | |
django_field = ENUM_FIELD_TPL % (val, blank) | |
else: | |
print 'unknow field type for %s' % name | |
django_class += "\n " + FIELD_TPL % (name, django_field) | |
django_classes.append(django_class) | |
print "\n\n".join(django_classes) | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print 'Need mysql dump file (.sql)' | |
file_name = sys.argv[1] | |
file_type = file_name.split('.')[-1] | |
if file_type != 'sql': | |
print 'File type need to be .sql' | |
table_name = None | |
fields = [] | |
tables = {} | |
sql_buffer = open(file_name, 'r').readlines() | |
for line in sql_buffer: | |
if state == STATE_SEARCHING_CREATE_STATEMENT: | |
table_name_res = search_create_statement(line) | |
if table_name_res: | |
table_name = table_name_res | |
if state == STATE_SEARCHING_STRUCTURE_FIELDS: | |
field_name_res = search_structure_fields(line) | |
if field_name_res: | |
fields.append(field_name_res) | |
if search_end_statement(line): | |
print 'end table %s' % table_name | |
tables[table_name] = fields | |
create_class(tables) | |
print 'Last state %s' % state |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment