mnpenner/README.md

## README.md

      
    Raw
  

              README.md
            
          
    http://www.commongrounds.co.uk/Library/lib/python2.7/site-packages/mysql/utilities/common/?DA
https://dba.stackexchange.com/a/215330/10786

  
## database.py
#
# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#

"""
This module contains abstractions of a MySQL Database object used by
multiple utilities.
"""

import multiprocessing
import os
import re
import sys

from collections import deque

from mysql.utilities.exception import UtilError, UtilDBError
from mysql.utilities.common.pattern_matching import REGEXP_QUALIFIED_OBJ_NAME
from mysql.utilities.common.options import obj2sql
from mysql.utilities.common.server import connect_servers, Server
from mysql.utilities.common.user import User
from mysql.utilities.common.sql_transform import (quote_with_backticks,
                                                  remove_backtick_quoting,
                                                  is_quoted_with_backticks)

# List of database objects for enumeration
_DATABASE, _TABLE, _VIEW, _TRIG, _PROC, _FUNC, _EVENT, _GRANT = "DATABASE", \
    "TABLE", "VIEW", "TRIGGER", "PROCEDURE", "FUNCTION", "EVENT", "GRANT"

_OBJTYPE_QUERY = """
    (
       SELECT TABLE_TYPE as object_type
       FROM INFORMATION_SCHEMA.TABLES
       WHERE TABLES.TABLE_SCHEMA = '%(db_name)s' AND
         TABLES.TABLE_NAME = '%(obj_name)s'
    )
    UNION
    (
        SELECT 'TRIGGER' as object_type
        FROM INFORMATION_SCHEMA.TRIGGERS
        WHERE TRIGGER_SCHEMA = '%(db_name)s' AND
          TRIGGER_NAME = '%(obj_name)s'
    )
    UNION
    (
        SELECT TYPE as object_type
        FROM mysql.proc
        WHERE DB = '%(db_name)s' AND NAME = '%(obj_name)s'
    )
    UNION
    (
        SELECT 'EVENT' as object_type
        FROM mysql.event
        WHERE DB = '%(db_name)s' AND NAME = '%(obj_name)s'
    )
"""

_DEFINITION_QUERY = """
  SELECT %(columns)s
  FROM INFORMATION_SCHEMA.%(table_name)s WHERE %(conditions)s
"""

_PARTITION_QUERY = """
  SELECT PARTITION_NAME, SUBPARTITION_NAME, PARTITION_ORDINAL_POSITION,
         SUBPARTITION_ORDINAL_POSITION, PARTITION_METHOD, SUBPARTITION_METHOD,
         PARTITION_EXPRESSION, SUBPARTITION_EXPRESSION, PARTITION_DESCRIPTION
  FROM INFORMATION_SCHEMA.PARTITIONS
  WHERE TABLE_SCHEMA = '%(db)s' AND TABLE_NAME = '%(name)s'
"""

_COLUMN_QUERY = """
  SELECT ORDINAL_POSITION, COLUMN_NAME, COLUMN_TYPE, IS_NULLABLE,
         COLUMN_DEFAULT, EXTRA, COLUMN_COMMENT, COLUMN_KEY
  FROM INFORMATION_SCHEMA.COLUMNS
  WHERE TABLE_SCHEMA = '%(db)s' AND TABLE_NAME = '%(name)s'
"""

_FK_CONSTRAINT_QUERY = """
SELECT TABLE_NAME, CONSTRAINT_NAME, COLUMN_NAME, REFERENCED_TABLE_SCHEMA,
REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME, UPDATE_RULE, DELETE_RULE
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE
USING (CONSTRAINT_SCHEMA, CONSTRAINT_NAME, TABLE_NAME, REFERENCED_TABLE_NAME)
WHERE CONSTRAINT_SCHEMA = '{DATABASE!s}'
AND TABLE_NAME = '{TABLE!s}'
"""

_ALTER_TABLE_ADD_FK_CONSTRAINT = """
ALTER TABLE {DATABASE!s}.{TABLE!s} add CONSTRAINT `{CONSTRAINT_NAME!s}`
FOREIGN KEY (`{COLUMN_NAMES}`)
REFERENCES `{REFERENCED_DATABASE}`.`{REFERENCED_TABLE!s}`
(`{REFERENCED_COLUMNS!s}`)
ON UPDATE {UPDATE_RULE}
ON DELETE {DELETE_RULE}
"""


def _multiprocess_tbl_copy_task(copy_tbl_task):
    """Multiprocess copy table data method.

    This method wraps the copy of the table's data to allow its concurrent
    execution by a pool of processes.

    copy_tbl_task[in]   dictionary of values required by a process to
                        perform the table copy task, namely:
                        'source_srv': <dict with source connections values>,
                        'dest_srv': <dict with destination connections values>,
                        'source_db': <source database name>,
                        'destination_db': <destination database name>,
                        'table': <table to copy>,
                        'options': <dict of options>,
                        'cloning': <cloning flag>,
                        'connections': <number of concurrent connections>,
                        'q_source_db': <quoted source database name>.
    """
    # Get input to execute task.
    source_srv = copy_tbl_task.get('source_srv')
    dest_srv = copy_tbl_task.get('dest_srv')
    source_db = copy_tbl_task.get('source_db')
    target_db = copy_tbl_task.get('target_db')
    table = copy_tbl_task.get('table')
    options = copy_tbl_task.get('options')
    cloning = copy_tbl_task.get('cloning')
    # Execute copy table task.
    # NOTE: Must handle any exception here, because worker processes will not
    # propagate them to the main process.
    try:
        _copy_table_data(source_srv, dest_srv, source_db, target_db, table,
                         options, cloning)
    except UtilError:
        _, err, _ = sys.exc_info()
        print("ERROR copying data for table '{0}': {1}".format(table,
                                                               err.errmsg))


def _copy_table_data(source_srv, destination_srv, db_name, new_db_name,
                     tbl_name, tbl_options, cloning, connections=1):
    """Copy the data of the specified table.

    This method copies/clones all the data from a table to another (new)
    database.

    source_srv[in]      Source server (Server instance or dict. with the
                        connection values).
    destination_srv[in] Destination server (Server instance or dict. with the
                        connection values).
    db_name[in]         Name of the database with the table to copy.
    new_db_name[in]     Name of the destination database to copy the table.
    tbl_name[in]        Name of the table to copy.
    tbl_options[in]     Table options.
    cloning[in]         Cloning flag, in order to use a different method to
                        copy data on the same server
    connections[in]     Specify the use of multiple connections/processes to
                        copy the table data (rows). By default, only 1 used.
                        Note: Multiprocessing option should be preferred.
    """
    # Import table needed here to avoid circular import issues.
    from mysql.utilities.common.table import Table
    # Handle source and destination server instances or connection values.
    # Note: For multiprocessing the use of connection values instead of a
    # server instance is required to avoid internal errors.
    if isinstance(source_srv, Server):
        source = source_srv
    else:
        # Get source server instance from connection values.
        conn_options = {
            'quiet': True,  # Avoid repeating output for multiprocessing.
            'version': "5.1.30",
        }
        servers = connect_servers(source_srv, None, conn_options)
        source = servers[0]
    if isinstance(destination_srv, Server):
        destination = destination_srv
    else:
        # Get source server instance from connection values.
        conn_options = {
            'quiet': True,  # Avoid repeating output for multiprocessing.
            'version': "5.1.30",
        }
        servers = connect_servers(destination_srv, None, conn_options)
        destination = servers[0]

    # Copy table data.
    if not tbl_options.get("quiet", False):
        print("# Copying data for TABLE {0}.{1}".format(db_name,
                                                        tbl_name))
    q_tbl_name = "{0}.{1}".format(quote_with_backticks(db_name),
                                  quote_with_backticks(tbl_name))
    tbl = Table(source, q_tbl_name, tbl_options)
    if tbl is None:
        raise UtilDBError("Cannot create table object before copy.", -1,
                          db_name)
    tbl.copy_data(destination, cloning, new_db_name, connections)


class Database(object):
    """
    The Database class encapsulates a database. The class has the following
    capabilities:

        - Check to see if the database exists
        - Drop the database
        - Create the database
        - Clone the database
        - Print CREATE statements for all objects
    """
    obj_type = _DATABASE

    def __init__(self, source, name, options=None):
        """Constructor

        source[in]         A Server object
        name[in]           Name of database
        verbose[in]        print extra data during operations (optional)
                           default value = False
        options[in]        Array of options for controlling what is included
                           and how operations perform (e.g., verbose)
        """
        if options is None:
            options = {}
        self.source = source
        # Keep database identifier considering backtick quotes
        if is_quoted_with_backticks(name):
            self.q_db_name = name
            self.db_name = remove_backtick_quoting(self.q_db_name)
        else:
            self.db_name = name
            self.q_db_name = quote_with_backticks(self.db_name)
        self.verbose = options.get("verbose", False)
        self.skip_tables = options.get("skip_tables", False)
        self.skip_views = options.get("skip_views", False)
        self.skip_triggers = options.get("skip_triggers", False)
        self.skip_procs = options.get("skip_procs", False)
        self.skip_funcs = options.get("skip_funcs", False)
        self.skip_events = options.get("skip_events", False)
        self.skip_grants = options.get("skip_grants", False)
        self.skip_create = options.get("skip_create", False)
        self.skip_data = options.get("skip_data", False)
        self.exclude_patterns = options.get("exclude_patterns", None)
        self.use_regexp = options.get("use_regexp", False)
        self.skip_table_opts = options.get("skip_table_opts", False)
        self.new_db = None
        self.q_new_db = None
        self.init_called = False
        self.destination = None  # Used for copy mode
        self.cloning = False    # Used for clone mode
        self.query_options = {  # Used for skipping buffered fetch of rows
            'fetch': False,
            'commit': False,  # No COMMIT needed for DDL operations (default).
        }
        self.constraints = deque()  # Used to store constraints to execute
                                    # after table creation, deque is
                                    # thread-safe

        self.objects = []
        self.new_objects = []

    def exists(self, server=None, db_name=None):
        """Check to see if the database exists

        server[in]         A Server object
                           (optional) If omitted, operation is performed
                           using the source server connection.
        db_name[in]        database name
                           (optional) If omitted, operation is performed
                           on the class instance table name.

        return True = database exists, False = database does not exist
        """

        if not server:
            server = self.source
        db = None
        if db_name:
            db = db_name
        else:
            db = self.db_name

        _QUERY = """
            SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA
            WHERE SCHEMA_NAME = '%s'
        """
        res = server.exec_query(_QUERY % db)
        return (res is not None and len(res) >= 1)

    def drop(self, server, quiet, db_name=None):
        """Drop the database

        server[in]         A Server object
        quiet[in]          ignore error on drop
        db_name[in]        database name
                           (optional) If omitted, operation is performed
                           on the class instance table name.

        return True = database successfully dropped, False = error
        """

        db = None
        if db_name:
            db = db_name if is_quoted_with_backticks(db_name) \
                else quote_with_backticks(db_name)
        else:
            db = self.q_db_name
        op_ok = False
        if quiet:
            try:
                server.exec_query("DROP DATABASE %s" % (db),
                                  self.query_options)
                op_ok = True
            except:
                pass
        else:
            server.exec_query("DROP DATABASE %s" % (db),
                              self.query_options)
            op_ok = True
        return op_ok

    def create(self, server, db_name=None, charset_name=None,
               collation_name=None):
        """Create the database

        server[in]         A Server object
        db_name[in]        database name
                           (optional) If omitted, operation is performed
                           on the class instance table name.

        return True = database successfully created, False = error
        """

        if db_name:
            db = db_name if is_quoted_with_backticks(db_name) \
                else quote_with_backticks(db_name)
        else:
            db = self.q_db_name

        specification = ""
        if charset_name:
            specification = " DEFAULT CHARACTER SET {0}".format(charset_name)
        if collation_name:
            specification = "{0} DEFAULT COLLATE {0}".format(specification,
                                                             collation_name)
        query_create_db = "CREATE DATABASE {0} {1}".format(db, specification)
        server.exec_query(query_create_db, self.query_options)

        return True

    def __make_create_statement(self, obj_type, obj):
        """Construct a CREATE statement for a database object.

        This method will get the CREATE statement from the method
        get_create_statement() and also replace all occurrances of the
        old database name with the new.

        obj_type[in]       Object type (string) e.g. DATABASE
        obj[in]            A row from the get_db_objects() method
                           that contains the elements of the object

        Note: This does not work for tables.

        Returns the CREATE string
        """

        if not self.new_db:
            self.new_db = self.db_name
            self.q_new_db = self.q_db_name
        create_str = None
        # Tables are not supported
        if obj_type == _TABLE and self.cloning:
            return None
        # Grants are a different animal!
        if obj_type == _GRANT:
            if obj[3]:
                create_str = "GRANT %s ON %s.%s TO %s" % \
                             (obj[1], self.q_new_db, obj[3], obj[0])
            else:
                create_str = "GRANT %s ON %s.* TO %s" % \
                             (obj[1], self.q_new_db, obj[0])
        else:
            create_str = self.get_create_statement(self.db_name,
                                                   obj[0], obj_type)
            if self.new_db != self.db_name:
                # Replace the occurrences of the old database name (quoted with
                # backticks) with the new one when preceded by: a whitespace
                # character, comma or optionally a left parentheses.
                create_str = re.sub(
                    r"(\s|,)(\(?){0}\.".format(self.q_db_name),
                    r"\1\2{0}.".format(self.q_new_db),
                    create_str
                )
                # Replace the occurrences of the old database name (without
                # backticks) with the new one when preceded by: a whitespace
                # character, comma or optionally a left parentheses and
                # surrounded by single or double quotes.
                create_str = re.sub(
                    r"(\s|,)(\(?)(\"|\'?){0}(\"|\'?)\.".format(self.db_name),
                    r"\1\2\3{0}\4.".format(self.new_db),
                    create_str
                )
        return create_str

    def __add_db_objects(self, obj_type):
        """Get a list of objects from a database based on type.

        This method retrieves the list of objects for a specific object
        type and adds it to the class' master object list.

        obj_type[in]       Object type (string) e.g. DATABASE
        """

        rows = self.get_db_objects(obj_type)
        if rows:
            for row in rows:
                tup = (obj_type, row)
                self.objects.append(tup)

    def init(self):
        """Get all objects for the database based on options set.

        This method initializes the database object with a list of all
        objects except those object types that are excluded. It calls
        the helper method self.__add_db_objects() for each type of
        object.

        NOTE: This method must be called before the copy method. A
              guard is in place to ensure this.
        """
        self.init_called = True
        # Get tables
        if not self.skip_tables:
            self.__add_db_objects(_TABLE)
        # Get functions
        if not self.skip_funcs:
            self.__add_db_objects(_FUNC)
        # Get stored procedures
        if not self.skip_procs:
            self.__add_db_objects(_PROC)
        # Get views
        if not self.skip_views:
            self.__add_db_objects(_VIEW)
        # Get triggers
        if not self.skip_triggers:
            self.__add_db_objects(_TRIG)
        # Get events
        if not self.skip_events:
            self.__add_db_objects(_EVENT)
        # Get grants
        if not self.skip_grants:
            self.__add_db_objects(_GRANT)

    def __drop_object(self, obj_type, name):
        """Drop a database object.

        Attempts a quiet drop of a database object (no errors are
        printed).

        obj_type[in]       Object type (string) e.g. DATABASE
        name[in]           Name of the object
        """

        if self.verbose:
            print "# Dropping new object %s %s.%s" % \
                  (obj_type, self.new_db, name)
        drop_str = "DROP %s %s.%s" % \
                   (obj_type, self.q_new_db, name)
        # Suppress the error on drop
        if self.cloning:
            try:
                self.source.exec_query(drop_str, self.query_options)
            except UtilError:
                if self.verbose:
                    print("# WARNING: Unable to drop {0} from {1} database "
                          "(object may not exist): {2}".format(name,
                                                               "source",
                                                               drop_str))
        else:
            try:
                self.destination.exec_query(drop_str, self.query_options)
            except UtilError:
                if self.verbose:
                    print("# WARNING: Unable to drop {0} from {1} database "
                          "(object may not exist): {2}".format(name,
                                                               "destination",
                                                               drop_str))

    def __create_object(self, obj_type, obj, show_grant_msg,
                        quiet=False, new_engine=None, def_engine=None):
        """Create a database object.

        obj_type[in]       Object type (string) e.g. DATABASE
        obj[in]            A row from the get_db_object_names() method
                           that contains the elements of the object
        show_grant_msg[in] If true, display diagnostic information
        quiet[in]          do not print informational messages
        new_engine[in]     Use this engine if not None for object
        def_engine[in]     If target storage engine doesn't exist, use
                           this engine.

        Note: will handle exception and print error if query fails
        """
        if obj_type == _TABLE and self.cloning:
            obj_name = quote_with_backticks(obj[0])
            create_list = ["CREATE TABLE {0!s}.{1!s} LIKE {2!s}.{1!s}".format(
                self.q_new_db, obj_name, self.q_db_name)
            ]
        else:
            create_list = [self.__make_create_statement(obj_type, obj)]
        if obj_type == _TABLE:
            may_skip_fk = False  # Check possible issues with FK Constraints
            obj_name = quote_with_backticks(obj[0])
            tbl_name = "%s.%s" % (self.q_new_db, obj_name)
            create_list = self.destination.substitute_engine(tbl_name,
                                                             create_list[0],
                                                             new_engine,
                                                             def_engine,
                                                             quiet)

            # Get storage engines from the source table and destination table
            # If the source table's engine is INNODB and the destination is
            # not we will loose any FK constraints that may exist
            src_eng = self.get_object_definition(self.q_db_name,
                                                 obj[0], obj_type)[0][0][2]
            dest_eng = None

            # Information about the engine is always in the last statement of
            # the list, be it a regular create table statement or a create
            # table; alter table statement.
            i = create_list[-1].find("ENGINE=")
            if i > 0:
                j = create_list[-1].find(" ", i)
                dest_eng = create_list[-1][i + 7:j]
            dest_eng = dest_eng or src_eng

            if src_eng.upper() == 'INNODB' and dest_eng.upper() != 'INNODB':
                may_skip_fk = True

        string = "# Copying"
        if not quiet:
            if obj_type == _GRANT:
                if show_grant_msg:
                    print "%s GRANTS from %s" % (string, self.db_name)
            else:
                print "%s %s %s.%s" % \
                      (string, obj_type, self.db_name, obj[0])
            if self.verbose:
                print("; ".join(create_list))

        try:
            self.destination.exec_query("USE %s" % self.q_new_db,
                                        self.query_options)
        except:
            pass
        for stm in create_list:
            try:
                self.destination.exec_query(stm, self.query_options)
            except Exception as e:
                raise UtilDBError("Cannot operate on {0} object."
                                  " Error: {1}".format(obj_type, e.errmsg),
                                  -1, self.db_name)

        # Look for foreign key constraints
        if obj_type == _TABLE:
            params = {
                'DATABASE': self.db_name,
                'TABLE': obj[0],
            }
            try:
                query = _FK_CONSTRAINT_QUERY.format(**params)
                fkey_constr = self.source.exec_query(query)
            except Exception as e:
                raise UtilDBError("Unable to obtain Foreign Key constraint "
                                  "information for table {0}.{1}. "
                                  "Error: {2}".format(self.db_name, obj[0],
                                                      e.errmsg), -1,
                                  self.db_name)

            # Get information about the foreign keys of the table being
            # copied/cloned.
            if fkey_constr and not may_skip_fk:

                # Create a constraint dictionary with the constraint
                # name as key
                constr_dict = {}

                # This list is used to ensure the same constraints are applied
                # in the same order, because iterating the dictionary doesn't
                # offer any guarantees regarding order, and Python 2.6 has
                # no ordered_dict
                constr_lst = []

                for fkey in fkey_constr:
                    params = constr_dict.get(fkey[1])
                    # in case the constraint entry already exists, it means it
                    # is composite, just update the columns names and
                    # referenced column fields
                    if params:
                        params['COLUMN_NAMES'].append(fkey[2])
                        params['REFERENCED_COLUMNS'].append(fkey[5])
                    else:  # else create a new entry
                        constr_lst.append(fkey[1])
                        constr_dict[fkey[1]] = {
                            'DATABASE': self.new_db,
                            'TABLE': fkey[0],
                            'CONSTRAINT_NAME': fkey[1],
                            'COLUMN_NAMES': [fkey[2]],
                            'REFERENCED_DATABASE': fkey[3],
                            'REFERENCED_TABLE': fkey[4],
                            'REFERENCED_COLUMNS': [fkey[5]],
                            'UPDATE_RULE': fkey[6],
                            'DELETE_RULE': fkey[7],
                        }
                # Iterate all the constraints and get the necessary parameters
                # to create the query
                for constr in constr_lst:
                    params = constr_dict[constr]
                    if self.cloning:  # if it is a cloning table operation

                        # In case the foreign key is composite we need to join
                        # the columns to use in in alter table query. Only
                        # useful when cloning
                        params['COLUMN_NAMES'] = '`,`'.join(
                            params['COLUMN_NAMES'])
                        params['REFERENCED_COLUMNS'] = '`,`'.join(
                            params['REFERENCED_COLUMNS'])

                        # If the foreign key points to a table under the
                        # database being cloned, change the referenced database
                        #  name to the new cloned database
                        if params['REFERENCED_DATABASE'] == self.db_name:
                            params['REFERENCED_DATABASE'] = self.new_db
                        else:
                            print("# WARNING: The database being cloned has "
                                  "external Foreign Key constraint "
                                  "dependencies, {0}.{1} depends on {2}."
                                  "{3}".format(params['DATABASE'],
                                               params['TABLE'],
                                               params['REFERENCED_DATABASE'],
                                               params['REFERENCED_TABLE'])
                                  )
                        query = _ALTER_TABLE_ADD_FK_CONSTRAINT.format(**params)

                        # Store constraint query for later execution
                        self.constraints.append(query)
                        if self.verbose:
                            print(query)
                    else:  # if we are copying
                        if params['REFERENCED_DATABASE'] != self.db_name:
                            # if the table being copied has dependencies
                            # to external databases
                            print("# WARNING: The database being copied has "
                                  "external Foreign Key constraint "
                                  "dependencies, {0}.{1} depends on {2}."
                                  "{3}".format(params['DATABASE'],
                                               params['TABLE'],
                                               params['REFERENCED_DATABASE'],
                                               params['REFERENCED_TABLE'])
                                  )
            elif fkey_constr and may_skip_fk:
                print("# WARNING: FOREIGN KEY constraints for table {0}.{1} "
                      "are missing because the new storage engine for "
                      "the table is not InnoDB".format(self.new_db, obj[0]))

    def __apply_constraints(self):
        """This method applies to the database the constraints stored in the
        self.constraints instance variable
        """

        # Enable Foreign Key Checks to prevent the swapping of
        # RESTRICT referential actions with NO ACTION
        query_opts = {'fetch': False, 'commit': False}
        self.destination.exec_query("SET FOREIGN_KEY_CHECKS=1", query_opts)

        # while constraint queue is not empty
        while self.constraints:
            try:
                query = self.constraints.pop()
            except IndexError:
                #queue is empty, exit while statement
                break
            if self.verbose:
                print(query)
            try:
                self.destination.exec_query(query, query_opts)
            except Exception as err:
                raise UtilDBError("Unable to execute constraint query "
                                  "{0}. Error: {1}".format(query, err.errmsg),
                                  -1, self.new_db)

        # Turn Foreign Key Checks off again
        self.destination.exec_query("SET FOREIGN_KEY_CHECKS=0", query_opts)

    def copy_objects(self, new_db, options, new_server=None,
                     connections=1, check_exists=True):
        """Copy the database objects.

        This method will copy a database and all of its objects and data
        to another, new database. Options set at instantiation will determine
        if there are objects that are excluded from the copy. Likewise,
        the method will also skip data if that option was set and process
        an input file with INSERT statements if that option was set.

        The method can also be used to copy a database to another server
        by providing the new server object (new_server). Copy to the same
        name by setting new_db = old_db or as a new database.

        new_db[in]         Name of the new database
        options[in]        Options for copy e.g. force, etc.
        new_server[in]     Connection to another server for copying the db
                           Default is None (copy to same server - clone)
        connections[in]    Number of threads(connections) to use for insert
        check_exists[in]   If True, check for database existence before copy
                           Default is True
        """

        # Must call init() first!
        # Guard for init() prerequisite
        assert self.init_called, "You must call db.init() before " + \
                                 "db.copy_objects()."

        grant_msg_displayed = False

        if new_db:
            # Assign new database identifier considering backtick quotes.
            if is_quoted_with_backticks(new_db):
                self.q_new_db = new_db
                self.new_db = remove_backtick_quoting(new_db)
            else:
                self.new_db = new_db
                self.q_new_db = quote_with_backticks(new_db)
        else:
            # If new_db is not defined use the same as source database.
            self.new_db = self.db_name
            self.q_new_db = self.q_db_name

        self.destination = new_server

        # We know we're cloning if there is no new connection.
        self.cloning = (new_server == self.source)

        if self.cloning:
            self.destination = self.source

        # Check to see if database exists
        if check_exists:
            if self.cloning:
                exists = self.exists(self.source, new_db)
                drop_server = self.source
            else:
                exists = self.exists(self.destination, new_db)
                drop_server = self.destination
            if exists:
                if options.get("force", False):
                    self.drop(drop_server, True, new_db)
                elif not self.skip_create:
                    raise UtilDBError("destination database exists. Use "
                                      "--force to overwrite existing "
                                      "database.", -1, new_db)

        db_name = self.db_name
        definition = self.get_object_definition(db_name, db_name, _DATABASE)
        _, character_set, collation, _ = definition[0]
        # Create new database first
        if not self.skip_create:
            if self.cloning:
                self.create(self.source, new_db, character_set,
                            collation)
            else:
                self.create(self.destination, new_db, character_set,
                            collation)

        # Create the objects in the new database
        for obj in self.objects:
            # Drop object if --force specified and database not dropped
            # Grants do not need to be dropped for overwriting
            if options.get("force", False) and obj[0] != _GRANT:
                obj_name = quote_with_backticks(obj[1][0])
                self.__drop_object(obj[0], obj_name)

            # Create the object
            self.__create_object(obj[0], obj[1], not grant_msg_displayed,
                                 options.get("quiet", False),
                                 options.get("new_engine", None),
                                 options.get("def_engine", None))

            if obj[0] == _GRANT and not grant_msg_displayed:
                grant_msg_displayed = True
        # After object creation, add the constraints
        if self.constraints:
            self.__apply_constraints()

    def copy_data(self, new_db, options, new_server=None, connections=1,
                  src_con_val=None, dest_con_val=None):
        """Copy the data for the tables.

        This method will copy the data for all of the tables to another, new
        database. The method will process an input file with INSERT statements
        if the option was selected by the caller.

        new_db[in]          Name of the new database
        options[in]         Options for copy e.g. force, etc.
        new_server[in]      Connection to another server for copying the db
                            Default is None (copy to same server - clone)
        connections[in]     Number of threads(connections) to use for insert
        src_con_val[in]     Dict. with the connection values of the source
                            server (required for multiprocessing).
        dest_con_val[in]    Dict. with the connection values of the
                            destination server (required for multiprocessing).
        """

        # Must call init() first!
        # Guard for init() prerequisite
        assert self.init_called, "You must call db.init() before " + \
                                 "db.copy_data()."

        if self.skip_data:
            return

        self.destination = new_server

        # We know we're cloning if there is no new connection.
        self.cloning = (new_server == self.source)

        if self.cloning:
            self.destination = self.source

        quiet = options.get("quiet", False)

        tbl_options = {
            'verbose': self.verbose,
            'get_cols': True,
            'quiet': quiet
        }

        copy_tbl_tasks = []
        table_names = [obj[0] for obj in self.get_db_objects(_TABLE)]
        for tblname in table_names:
            # Check multiprocess table copy (only on POSIX systems).
            if options['multiprocess'] > 1 and os.name == 'posix':
                # Create copy task.
                copy_task = {
                    'source_srv': src_con_val,
                    'dest_srv': dest_con_val,
                    'source_db': self.db_name,
                    'target_db': new_db,
                    'table': tblname,
                    'options': tbl_options,
                    'cloning': self.cloning,
                }
                copy_tbl_tasks.append(copy_task)
            else:
                # Copy data from a table (no multiprocessing).
                _copy_table_data(self.source, self.destination, self.db_name,
                                 new_db, tblname, tbl_options, self.cloning)

        # Copy tables concurrently.
        if copy_tbl_tasks:
            # Create process pool.
            workers_pool = multiprocessing.Pool(
                processes=options['multiprocess']
            )
            # Concurrently export tables.
            workers_pool.map_async(_multiprocess_tbl_copy_task, copy_tbl_tasks)
            workers_pool.close()
            # Wait for all task to be completed by workers.
            workers_pool.join()

    def get_create_statement(self, db, name, obj_type):
        """Return the create statement for the object

        db[in]             Database name
        name[in]           Name of the object
        obj_type[in]       Object type (string) e.g. DATABASE
                           Note: this is used to form the correct SHOW command

        Returns create statement
        """
        # Quote database and object name with backticks.
        q_name = (name if is_quoted_with_backticks(name)
                  else quote_with_backticks(name))
        if obj_type == _DATABASE:
            name_str = q_name
        else:
            q_db = (db if is_quoted_with_backticks(db)
                    else quote_with_backticks(db))

            # Switch the default database to execute the
            # SHOW CREATE statement without needing to specify the database
            # This is for 5.1 compatibility reasons:
            try:
                self.source.exec_query("USE {0}".format(q_db),
                                       self.query_options)
            except UtilError as err:
                raise UtilDBError("ERROR: Couldn't change "
                                  "default database: {0}".format(err.errmsg))
        name_str = q_name

        # Retrieve the CREATE statement.
        row = self.source.exec_query(
            "SHOW CREATE {0} {1}".format(obj_type, name_str)
        )
        create_statement = None
        if row:
            if obj_type == _TABLE or obj_type == _VIEW or \
               obj_type == _DATABASE:
                create_statement = row[0][1]
            elif obj_type == _EVENT:
                create_statement = row[0][3]
            else:
                create_statement = row[0][2]

        # Remove all table options from the CREATE statement (if requested).
        if self.skip_table_opts and obj_type == _TABLE:
            # First, get partition options.
            create_tbl, sep, part_opts = create_statement.rpartition('\n/*')
            # Handle situation where no partition options are found.
            if not create_tbl:
                create_tbl = part_opts
                part_opts = ''
            else:
                part_opts = "{0}{1}".format(sep, part_opts)
            # Then, separate table definitions from table options.
            create_tbl, sep, _ = create_tbl.rpartition(') ')
            # Reconstruct CREATE statement without table options.
            create_statement = "{0}{1}{2}".format(create_tbl, sep, part_opts)

        return create_statement

    def get_create_table(self, db, table):
        """Return the create table statement for the given table.

        This method returns the CREATE TABLE statement for the given table with
        or without the table options, according to the Database object
        property 'skip_table_opts'.

        db[in]             Database name.
        table[in]          Table name.

        Returns a tuple with the CREATE TABLE statement and table options
        (or None). If skip_table_opts=True the CREATE statement does not
        include the table options that are returned separately, otherwise the
        table options are included in the CREATE statement and None is returned
        as the second tuple element.
        """
        # Quote database and table name with backticks.
        q_table = (table if is_quoted_with_backticks(table)
                   else quote_with_backticks(table))
        q_db = db if is_quoted_with_backticks(db) else quote_with_backticks(db)

        # Retrieve CREATE TABLE.
        try:
            row = self.source.exec_query(
                "SHOW CREATE TABLE {0}.{1}".format(q_db, q_table)
            )
            create_tbl = row[0][1]
        except UtilError as err:
            raise UtilDBError("Error retrieving CREATE TABLE for {0}.{1}: "
                              "{2}".format(q_db, q_table, err.errmsg))

        # Separate table options from table definition.
        tbl_opts = None
        if self.skip_table_opts:
            # First, get partition options.
            create_tbl, sep, part_opts = create_tbl.rpartition('\n/*')
            # Handle situation where no partition options are found.
            if not create_tbl:
                create_tbl = part_opts
                part_opts = ''
            else:
                part_opts = "{0}{1}".format(sep, part_opts)
            # Then, separate table definitions from table options.
            create_tbl, sep, tbl_opts = create_tbl.rpartition(') ')
            # Reconstruct CREATE TABLE without table options.
            create_tbl = "{0}{1}{2}".format(create_tbl, sep, part_opts)

        return create_tbl, tbl_opts

    def get_table_options(self, db, table):
        """Return the table options.

        This method returns the list of used table options (from the CREATE
        TABLE statement).

        db[in]             Database name.
        table[in]          Table name.

        Returns a list of table options.
        For example: ['AUTO_INCREMENT=5','ENGINE=InnoDB']
        """
        # Quote database and table name with backticks.
        q_table = (table if is_quoted_with_backticks(table)
                   else quote_with_backticks(table))
        q_db = db if is_quoted_with_backticks(db) else quote_with_backticks(db)

        # Retrieve CREATE TABLE statement.
        try:
            row = self.source.exec_query(
                "SHOW CREATE TABLE {0}.{1}".format(q_db, q_table)
            )
            create_tbl = row[0][1]
        except UtilError as err:
            raise UtilDBError("Error retrieving CREATE TABLE for {0}.{1}: "
                              "{2}".format(q_db, q_table, err.errmsg))

        # First, separate partition options.
        create_tbl, _, part_opts = create_tbl.rpartition('\n/*')
        # Handle situation where no partition options are found.
        create_tbl = part_opts if not create_tbl else create_tbl
        # Then, separate table options from table definition.
        create_tbl, _, tbl_opts = create_tbl.rpartition(') ')
        table_options = tbl_opts.split()

        return table_options

    def get_object_definition(self, db, name, obj_type):
        """Return a list of the object's creation metadata.

        This method queries the INFORMATION_SCHEMA or MYSQL database for the
        row-based (list) description of the object. This is similar to the
        output EXPLAIN <object>.

        db[in]             Database name
        name[in]           Name of the object
        obj_type[in]       Object type (string) e.g. DATABASE
                           Note: this is used to form the correct SHOW command

        Returns list - object definition, None if db.object does not exist
        """
        definition = []
        from_name = None
        condition = None

        # Remove objects backticks if needed
        db = remove_backtick_quoting(db) \
            if is_quoted_with_backticks(db) else db
        name = remove_backtick_quoting(name) \
            if is_quoted_with_backticks(name) else name

        if obj_type == _DATABASE:
            columns = 'SCHEMA_NAME, DEFAULT_CHARACTER_SET_NAME, ' + \
                      'DEFAULT_COLLATION_NAME, SQL_PATH'
            from_name = 'SCHEMATA'
            condition = "SCHEMA_NAME = '%s'" % name
        elif obj_type == _TABLE:
            columns = 'TABLE_SCHEMA, TABLE_NAME, ENGINE, AUTO_INCREMENT, ' + \
                      'AVG_ROW_LENGTH, CHECKSUM, TABLE_COLLATION, ' + \
                      'TABLE_COMMENT, ROW_FORMAT, CREATE_OPTIONS'
            from_name = 'TABLES'
            condition = "TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'" % \
                        (db, name)
        elif obj_type == _VIEW:
            columns = 'TABLE_SCHEMA, TABLE_NAME, VIEW_DEFINITION, ' + \
                      'CHECK_OPTION, DEFINER, SECURITY_TYPE'
            from_name = 'VIEWS'
            condition = "TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s'" % \
                        (db, name)
        elif obj_type == _TRIG:
            columns = 'TRIGGER_SCHEMA, TRIGGER_NAME, EVENT_MANIPULATION, ' + \
                      'EVENT_OBJECT_TABLE, ACTION_STATEMENT, ' + \
                      'ACTION_TIMING, DEFINER'
            from_name = 'TRIGGERS'
            condition = "TRIGGER_SCHEMA = '%s' AND TRIGGER_NAME = '%s'" % \
                        (db, name)
        elif obj_type == _PROC or obj_type == _FUNC:
            columns = 'ROUTINE_SCHEMA, ROUTINE_NAME, ROUTINE_DEFINITION, ' + \
                      'ROUTINES.SQL_DATA_ACCESS, ROUTINES.SECURITY_TYPE, ' + \
                      'ROUTINE_COMMENT, ROUTINES.DEFINER, param_list, ' + \
                      'DTD_IDENTIFIER, ROUTINES.IS_DETERMINISTIC'
            from_name = 'ROUTINES JOIN mysql.proc ON ' + \
                        'ROUTINES.ROUTINE_SCHEMA = proc.db AND ' + \
                        'ROUTINES.ROUTINE_NAME = proc.name AND ' + \
                        'ROUTINES.ROUTINE_TYPE = proc.type '
            condition = "ROUTINE_SCHEMA = '%s' AND ROUTINE_NAME = '%s'" % \
                        (db, name)
            if obj_type == _PROC:
                typ = 'PROCEDURE'
            else:
                typ = 'FUNCTION'
            condition += " AND ROUTINE_TYPE = '%s'" % typ
        elif obj_type == _EVENT:
            columns = ('EVENT_SCHEMA, EVENT_NAME, DEFINER, EVENT_DEFINITION, '
                       'EVENT_TYPE, INTERVAL_FIELD, INTERVAL_VALUE, STATUS, '
                       'ON_COMPLETION, STARTS, ENDS')
            from_name = 'EVENTS'
            condition = "EVENT_SCHEMA = '%s' AND EVENT_NAME = '%s'" % \
                        (db, name)

        if from_name is None:
            raise UtilError('Attempting to get definition from unknown object '
                            'type = %s.' % obj_type)

        values = {
            'columns': columns,
            'table_name': from_name,
            'conditions': condition,
        }
        rows = self.source.exec_query(_DEFINITION_QUERY % values)
        if rows != []:
            # If this is a table, we need three types of information:
            # basic info, column info, and partitions info
            if obj_type == _TABLE:
                values['name'] = name
                values['db'] = db
                basic_def = rows[0]
                col_def = self.source.exec_query(_COLUMN_QUERY % values)
                part_def = self.source.exec_query(_PARTITION_QUERY % values)
                definition.append((basic_def, col_def, part_def))
            else:
                definition.append(rows[0])

        return definition

    def get_next_object(self):
        """Retrieve the next object in the database list.

        This method is an iterator for retrieving the objects in the database
        as specified in the init() method. You must call this method first.

        Returns next object in list or throws exception at EOL.
        """

        # Must call init() first!
        # Guard for init() prerequisite
        assert self.init_called, "You must call db.init() before db.copy()."

        for obj in self.objects:
            yield obj

    def __build_exclude_patterns(self, exclude_param):
        """Return a string to add to where clause to exclude objects.

        This method will add the conditions to exclude objects based on
        name if there is a dot notation or by a search pattern as specified
        by the options.

        exclude_param[in]  Name of column to check.

        Returns (string) String to add to where clause or ""
        """
        oper = 'NOT REGEXP' if self.use_regexp else 'NOT LIKE'
        string = ""
        for pattern in self.exclude_patterns:
            # Check use of qualified object names (with backtick support).
            if pattern.find(".") > 0:
                use_backtick = is_quoted_with_backticks(pattern)
                db, name = Database.parse_object_name(pattern)
                if use_backtick:
                    # Remove backtick quotes.
                    db = remove_backtick_quoting(db)
                    name = remove_backtick_quoting(name)
                if db == self.db_name:  # Check if database name matches.
                    value = name  # Only use the object name to exclude.
                else:
                    value = pattern
            # Otherwise directly use the specified pattern.
            else:
                value = pattern
            if value:
                # Append exclude condition to previous one(s).
                string = "{0} AND {1} {2} {3}".format(string, exclude_param,
                                                      oper, obj2sql(value))

        return string

    def get_object_type(self, object_name):
        """Return the object type of an object

        This method attempts to locate the object name among the objects
        in the database. It returns the object type if found or None
        if not found.
        Note: different types of objects with the same name might exist in the
        database.

        object_name[in]    Name of the object to find

        Returns (list of strings) with the object types or None if not found
        """
        object_types = None

        # Remove object backticks if needed
        obj_name = remove_backtick_quoting(object_name) \
            if is_quoted_with_backticks(object_name) else object_name

        res = self.source.exec_query(_OBJTYPE_QUERY %
                                     {'db_name': self.db_name,
                                      'obj_name': obj_name})

        if res:
            object_types = ['TABLE' if row[0] == 'BASE TABLE' else row[0]
                            for row in res]

        return object_types

    def get_db_objects(self, obj_type, columns='names', get_columns=False,
                       need_backtick=False):
        """Return a result set containing a list of objects for a given
        database based on type.

        This method returns either a list of names for the object type
        specified, a brief list of minimal columns for creating the
        objects, or the full list of columns from INFORMATION_SCHEMA. It can
        also provide the list of column names if desired.

        obj_type[in]       Type of object to retrieve
        columns[in]        Column mode - names (default), brief, or full
                           Note: not valid for GRANT objects.
        get_columns[in]    If True, return column names as first element
                           and result set as second element. If False,
                           return only the result set.
        need_backtick[in]  If True, it returns any identifiers, e.g. table and
                           column names, quoted with backticks.
                           By default, False.

        TODO: Change implementation to return classes instead of a result set.

        Returns mysql.connector result set
        """

        exclude_param = ""
        if obj_type == _TABLE:
            _NAMES = """
            SELECT DISTINCT TABLES.TABLE_NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT TABLES.TABLE_CATALOG, TABLES.TABLE_SCHEMA,
                TABLES.TABLE_NAME, TABLES.TABLE_TYPE,
                TABLES.ENGINE, TABLES.VERSION, TABLES.ROW_FORMAT,
                TABLES.TABLE_ROWS, TABLES.AVG_ROW_LENGTH, TABLES.DATA_LENGTH,
                TABLES.MAX_DATA_LENGTH, TABLES.INDEX_LENGTH, TABLES.DATA_FREE,
                TABLES.AUTO_INCREMENT, TABLES.CREATE_TIME, TABLES.UPDATE_TIME,
                TABLES.CHECK_TIME, TABLES.TABLE_COLLATION, TABLES.CHECKSUM,
                TABLES.CREATE_OPTIONS, TABLES.TABLE_COMMENT,
                COLUMNS.ORDINAL_POSITION, COLUMNS.COLUMN_NAME,
                COLUMNS.COLUMN_TYPE, COLUMNS.IS_NULLABLE,
                COLUMNS.COLUMN_DEFAULT, COLUMNS.COLUMN_KEY,
                REFERENTIAL_CONSTRAINTS.CONSTRAINT_NAME,
                REFERENTIAL_CONSTRAINTS.REFERENCED_TABLE_NAME,
                REFERENTIAL_CONSTRAINTS.UNIQUE_CONSTRAINT_NAME,
                REFERENTIAL_CONSTRAINTS.UNIQUE_CONSTRAINT_SCHEMA,
                REFERENTIAL_CONSTRAINTS.UPDATE_RULE,
                REFERENTIAL_CONSTRAINTS.DELETE_RULE,
                KEY_COLUMN_USAGE.CONSTRAINT_NAME,
                KEY_COLUMN_USAGE.COLUMN_NAME AS COL_NAME,
                KEY_COLUMN_USAGE.REFERENCED_TABLE_SCHEMA,
                KEY_COLUMN_USAGE.REFERENCED_COLUMN_NAME
            """
            full_pos_to_quote = (1, 2, 22, 27, 28, 29, 30, 33, 34, 35, 36)
            _MINIMAL = """
            SELECT TABLES.TABLE_SCHEMA, TABLES.TABLE_NAME, TABLES.ENGINE,
                COLUMNS.ORDINAL_POSITION, COLUMNS.COLUMN_NAME,
                COLUMNS.COLUMN_TYPE, COLUMNS.IS_NULLABLE,
                COLUMNS.COLUMN_DEFAULT, COLUMNS.COLUMN_KEY,
                TABLES.TABLE_COLLATION,
                TABLES.CREATE_OPTIONS,
                REFERENTIAL_CONSTRAINTS.CONSTRAINT_NAME,
                REFERENTIAL_CONSTRAINTS.REFERENCED_TABLE_NAME,
                REFERENTIAL_CONSTRAINTS.UNIQUE_CONSTRAINT_NAME,
                REFERENTIAL_CONSTRAINTS.UPDATE_RULE,
                REFERENTIAL_CONSTRAINTS.DELETE_RULE,
                KEY_COLUMN_USAGE.CONSTRAINT_NAME,
                KEY_COLUMN_USAGE.COLUMN_NAME AS COL_NAME,
                KEY_COLUMN_USAGE.REFERENCED_TABLE_SCHEMA,
                KEY_COLUMN_USAGE.REFERENCED_COLUMN_NAME
            """
            minimal_pos_to_quote = (0, 1, 4, 11, 12, 13, 16, 17, 18, 19)
            _OBJECT_QUERY = """
            FROM INFORMATION_SCHEMA.TABLES JOIN INFORMATION_SCHEMA.COLUMNS ON
                TABLES.TABLE_SCHEMA = COLUMNS.TABLE_SCHEMA AND
                TABLES.TABLE_NAME = COLUMNS.TABLE_NAME
            LEFT JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS ON
                TABLES.TABLE_SCHEMA = REFERENTIAL_CONSTRAINTS.CONSTRAINT_SCHEMA
                AND
                TABLES.TABLE_NAME = REFERENTIAL_CONSTRAINTS.TABLE_NAME
            LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE ON
                TABLES.TABLE_SCHEMA = KEY_COLUMN_USAGE.CONSTRAINT_SCHEMA
                AND
                TABLES.TABLE_NAME = KEY_COLUMN_USAGE.TABLE_NAME
            WHERE TABLES.TABLE_SCHEMA = '%s' AND TABLE_TYPE <> 'VIEW' %s
            GROUP BY TABLES.TABLE_SCHEMA, TABLES.TABLE_NAME,
                     COLUMNS.ORDINAL_POSITION
            ORDER BY TABLES.TABLE_SCHEMA, TABLES.TABLE_NAME,
                     COLUMNS.ORDINAL_POSITION
            """
            exclude_param = "TABLES.TABLE_NAME"

        elif obj_type == _VIEW:
            _NAMES = """
            SELECT TABLE_NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT TABLE_CATALOG, TABLE_SCHEMA, TABLE_NAME, VIEW_DEFINITION,
                   CHECK_OPTION, IS_UPDATABLE, DEFINER, SECURITY_TYPE,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION
            """
            full_pos_to_quote = (1, 2)
            _MINIMAL = """
            SELECT TABLE_SCHEMA, TABLE_NAME, DEFINER, SECURITY_TYPE,
                   VIEW_DEFINITION, CHECK_OPTION, IS_UPDATABLE,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION
            """
            minimal_pos_to_quote = (0, 1)
            _OBJECT_QUERY = """
            FROM INFORMATION_SCHEMA.VIEWS
            WHERE TABLE_SCHEMA = '%s' %s
            """
            exclude_param = "VIEWS.TABLE_NAME"
        elif obj_type == _TRIG:
            _NAMES = """
            SELECT TRIGGER_NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT TRIGGER_CATALOG, TRIGGER_SCHEMA, TRIGGER_NAME,
                   EVENT_MANIPULATION, EVENT_OBJECT_CATALOG,
                   EVENT_OBJECT_SCHEMA, EVENT_OBJECT_TABLE, ACTION_ORDER,
                   ACTION_CONDITION, ACTION_STATEMENT, ACTION_ORIENTATION,
                   ACTION_TIMING, ACTION_REFERENCE_OLD_TABLE,
                   ACTION_REFERENCE_NEW_TABLE, ACTION_REFERENCE_OLD_ROW,
                   ACTION_REFERENCE_NEW_ROW, CREATED, SQL_MODE, DEFINER,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DATABASE_COLLATION
            """
            full_pos_to_quote = (1, 2, 5, 6)  # 9 ?
            _MINIMAL = """
            SELECT TRIGGER_NAME, DEFINER, EVENT_MANIPULATION,
                   EVENT_OBJECT_SCHEMA, EVENT_OBJECT_TABLE,
                   ACTION_ORIENTATION, ACTION_TIMING,
                   ACTION_STATEMENT, SQL_MODE,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DATABASE_COLLATION
            """
            # Note: 7 (ACTION_STATEMENT) might require special handling
            minimal_pos_to_quote = (0, 3, 4)
            _OBJECT_QUERY = """
            FROM INFORMATION_SCHEMA.TRIGGERS
            WHERE TRIGGER_SCHEMA = '%s' %s
            """
            exclude_param = "TRIGGERS.TRIGGER_NAME"
        elif obj_type == _PROC:
            _NAMES = """
            SELECT NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT DB, NAME, TYPE, SPECIFIC_NAME, LANGUAGE, SQL_DATA_ACCESS,
                   IS_DETERMINISTIC, SECURITY_TYPE, PARAM_LIST, RETURNS, BODY,
                   DEFINER, CREATED, MODIFIED, SQL_MODE, COMMENT,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION, DB_COLLATION,
                   BODY_UTF8
            """
            full_pos_to_quote = (0, 1, 3)
            _MINIMAL = """
            SELECT NAME, LANGUAGE, SQL_DATA_ACCESS, IS_DETERMINISTIC,
                   SECURITY_TYPE, DEFINER, PARAM_LIST, RETURNS,
                   BODY, SQL_MODE,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DB_COLLATION
            """
            minimal_pos_to_quote = (0,)
            _OBJECT_QUERY = """
            FROM mysql.proc
            WHERE DB = '%s' AND TYPE = 'PROCEDURE' %s
            """
            exclude_param = "NAME"
        elif obj_type == _FUNC:
            _NAMES = """
            SELECT NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT DB, NAME, TYPE, SPECIFIC_NAME, LANGUAGE, SQL_DATA_ACCESS,
                   IS_DETERMINISTIC, SECURITY_TYPE, PARAM_LIST, RETURNS, BODY,
                   DEFINER, CREATED, MODIFIED, SQL_MODE, COMMENT,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION, DB_COLLATION,
                   BODY_UTF8
            """
            full_pos_to_quote = (0, 1, 3)
            _MINIMAL = """
            SELECT NAME, LANGUAGE, SQL_DATA_ACCESS, IS_DETERMINISTIC,
                   SECURITY_TYPE, DEFINER, PARAM_LIST, RETURNS,
                   BODY, SQL_MODE,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DB_COLLATION
            """
            minimal_pos_to_quote = (0,)
            _OBJECT_QUERY = """
            FROM mysql.proc
            WHERE DB = '%s' AND TYPE = 'FUNCTION' %s
            """
            exclude_param = "NAME"
        elif obj_type == _EVENT:
            _NAMES = """
            SELECT NAME
            """
            names_pos_to_quote = (0,)
            _FULL = """
            SELECT DB, NAME, BODY, DEFINER, EXECUTE_AT, INTERVAL_VALUE,
                   INTERVAL_FIELD, CREATED, MODIFIED, LAST_EXECUTED, STARTS,
                   ENDS, STATUS, ON_COMPLETION, SQL_MODE, COMMENT, ORIGINATOR,
                   TIME_ZONE, CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DB_COLLATION, BODY_UTF8
            """
            full_pos_to_quote = (0, 1)
            _MINIMAL = """
            SELECT NAME, DEFINER, BODY, STATUS,
                   EXECUTE_AT, INTERVAL_VALUE, INTERVAL_FIELD, SQL_MODE,
                   STARTS, ENDS, STATUS, ON_COMPLETION, ORIGINATOR,
                   CHARACTER_SET_CLIENT, COLLATION_CONNECTION,
                   DB_COLLATION
            """
            minimal_pos_to_quote = (0,)
            _OBJECT_QUERY = """
            FROM mysql.event
            WHERE DB = '%s' %s
            """
            exclude_param = "NAME"
        elif obj_type == _GRANT:
            _OBJECT_QUERY = """
            (
                SELECT GRANTEE, PRIVILEGE_TYPE, TABLE_SCHEMA,
                       NULL as TABLE_NAME, NULL AS COLUMN_NAME,
                       NULL AS ROUTINE_NAME
                FROM INFORMATION_SCHEMA.SCHEMA_PRIVILEGES
                WHERE table_schema = '%s'
            ) UNION (
                SELECT grantee, privilege_type, table_schema, table_name,
                       NULL, NULL
                FROM INFORMATION_SCHEMA.TABLE_PRIVILEGES
                WHERE table_schema = '%s'
            ) UNION (
                SELECT grantee, privilege_type, table_schema, table_name,
                       column_name, NULL
                FROM INFORMATION_SCHEMA.COLUMN_PRIVILEGES
                WHERE table_schema = '%s'
            ) UNION (
                SELECT CONCAT('''', User, '''@''', Host, ''''),  Proc_priv, Db,
                       Routine_name, NULL, Routine_type
                FROM mysql.procs_priv WHERE Db = '%s'
            ) ORDER BY GRANTEE ASC, PRIVILEGE_TYPE ASC, TABLE_SCHEMA ASC,
                       TABLE_NAME ASC, COLUMN_NAME ASC, ROUTINE_NAME ASC
            """
        else:
            return None

        col_options = {
            'columns': get_columns
        }
        pos_to_quote = ()
        if obj_type == _GRANT:
            query = _OBJECT_QUERY % (self.db_name, self.db_name,
                                     self.db_name, self.db_name)
            return self.source.exec_query(query, col_options)
        else:
            if columns == "names":
                prefix = _NAMES
                if need_backtick:
                    pos_to_quote = names_pos_to_quote
            elif columns == "full":
                prefix = _FULL
                if need_backtick:
                    pos_to_quote = full_pos_to_quote
            else:
                prefix = _MINIMAL
                if need_backtick:
                    pos_to_quote = minimal_pos_to_quote
            # Form exclusion string
            exclude_str = ""
            if self.exclude_patterns:
                exclude_str = self.__build_exclude_patterns(exclude_param)
            query = prefix + _OBJECT_QUERY % (self.db_name, exclude_str)
            res = self.source.exec_query(query, col_options)

            # Quote required identifiers with backticks
            if need_backtick:
                # function to quote row elements at a given positions
                # quote = lambda pos, obj: quote_with_backticks(obj) \
                #         if obj and pos in pos_to_quote else obj
                new_rows = []
                for row in res[1]:
                    # recreate row tuple quoting needed elements with backticks
                    #r = tuple([quote(i, data) for i, data in enumerate(row)])
                    r = tuple([quote_with_backticks(data)
                               if data and i in pos_to_quote else data
                               for i, data in enumerate(row)])
                    new_rows.append(r)

                # set new result with with required data quoted with backticks
                res = (res[0], new_rows)

            return res

    def _check_user_permissions(self, uname, host, access):
        """Check user permissions for a given privilege

        uname[in]          user name to check
        host[in]           host name of connection
        acess[in]          privilege to check (e.g. "SELECT")

        Returns True if user has permission, False if not
        """
        user = User(self.source, uname + '@' + host)
        result = user.has_privilege(access[0], '*', access[1])
        return result

    def check_read_access(self, user, host, options):
        """Check access levels for reading database objects

        This method will check the user's permission levels for copying a
        database from this server.

        It will also skip specific checks if certain objects are not being
        copied (i.e., views, procs, funcs, grants).

        user[in]           user name to check
        host[in]           host name to check
        options[in]        dictionary of values to include:
            skip_views     True = no views processed
            skip_proc      True = no procedures processed
            skip_func      True = no functions processed
            skip_grants    True = no grants processed
            skip_events    True = no events processed

        Returns True if user has permissions and raises a UtilDBError if the
                     user does not have permission with a message that includes
                     the server context.
        """

        # Build minimal list of privileges for source access
        source_privs = []
        priv_tuple = (self.db_name, "SELECT")
        source_privs.append(priv_tuple)
        # if views are included, we need SHOW VIEW
        if not options.get('skip_views', False):
            priv_tuple = (self.db_name, "SHOW VIEW")
            source_privs.append(priv_tuple)
        # if procs or funcs are included, we need read on mysql db
        if not options.get('skip_proc', False) or \
           not options.get('skip_func', False):
            priv_tuple = ("mysql", "SELECT")
            source_privs.append(priv_tuple)
        # if events, we need event
        if not options.get('skip_events', False):
            priv_tuple = (self.db_name, "EVENT")
            source_privs.append(priv_tuple)

        # Check permissions on source
        for priv in source_privs:
            if not self._check_user_permissions(user, host, priv):
                raise UtilDBError("User %s on the %s server does not have "
                                  "permissions to read all objects in %s. " %
                                  (user, self.source.role, self.db_name) +
                                  "User needs %s privilege on %s." %
                                  (priv[1], priv[0]), -1, priv[0])

        return True

    def check_write_access(self, user, host, options):
        """Check access levels for creating and writing database objects

        This method will check the user's permission levels for copying a
        database to this server.

        It will also skip specific checks if certain objects are not being
        copied (i.e., views, procs, funcs, grants).

        user[in]           user name to check
        host[in]           host name to check
        options[in]        dictionary of values to include:
            skip_views     True = no views processed
            skip_proc      True = no procedures processed
            skip_func      True = no functions processed
            skip_grants    True = no grants processed
            skip_events    True = no events processed


        Returns True if user has permissions and raises a UtilDBError if the
                     user does not have permission with a message that includes
                     the server context.
        """

        dest_privs = [(self.db_name, "CREATE"),
                      (self.db_name, "SUPER"),
                      ("*", "SUPER")]
        if not options.get('skip_grants', False):
            priv_tuple = (self.db_name, "WITH GRANT OPTION")
            dest_privs.append(priv_tuple)

        # Check privileges on destination
        for priv in dest_privs:
            if not self._check_user_permissions(user, host, priv):
                raise UtilDBError("User %s on the %s server does not "
                                  "have permissions to create all objects "
                                  "in %s. User needs %s privilege on %s." %
                                  (user, self.source.role, priv[0], priv[1],
                                   priv[0]), -1, priv[0])

        return True

    @staticmethod
    def parse_object_name(qualified_name):
        """Parse db, name from db.name

        qualified_name[in] MySQL object string (e.g. db.table)

        Returns tuple containing name split
        """

        # Split the qualified name considering backtick quotes
        parts = re.match(REGEXP_QUALIFIED_OBJ_NAME, qualified_name)
        if parts:
            return parts.groups()
        else:
            return (None, None)

## frm_reader.py
#
# Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#

"""
This file contains a module to read .frm files and attempt to create a
facsimile of the CREATE TABLE command.
"""

import bisect
import os
import stat
import struct
import time

from mysql.utilities.common.charsets import CharsetInfo
from mysql.utilities.exception import UtilError
from pprint import pprint


#
# Definitions and types for interpreting the .frm file values.
#

# Misc. constants
_PORTABLE_SIZEOF_CHAR_PTR = 8
_MY_CHARSET_BIN_NUM = 63
_HA_NOSAME = 1
_DIG2BYTES = [0, 1, 1, 2, 2, 3, 3, 4, 4, 4]
_DIG_PER_DEC1 = 9
_HEADER_LEN = 64
_TABLE_TYPE = 0x01fe   # Magic number for table .frm files
_VIEW_TYPE = 0x5954    # Magic number for view .frm files
_FIELD_NR_MASK = 16383
_HA_USES_COMMENT = 4096

# MySQL data type definitions
_MYSQL_TYPE_DECIMAL = 0
_MYSQL_TYPE_TINY = 1
_MYSQL_TYPE_SHORT = 2
_MYSQL_TYPE_LONG = 3
_MYSQL_TYPE_FLOAT = 4
_MYSQL_TYPE_DOUBLE = 5
_MYSQL_TYPE_NULL = 6
_MYSQL_TYPE_TIMESTAMP = 7
_MYSQL_TYPE_LONGLONG = 8
_MYSQL_TYPE_INT24 = 9
_MYSQL_TYPE_DATE = 10
_MYSQL_TYPE_TIME = 11
_MYSQL_TYPE_DATETIME = 12
_MYSQL_TYPE_YEAR = 13
_MYSQL_TYPE_NEWDATE = 14
_MYSQL_TYPE_VARCHAR = 15
_MYSQL_TYPE_BIT = 16
_MYSQL_TYPE_TIMESTAMP2 = 17
_MYSQL_TYPE_DATETIME2 = 18
_MYSQL_TYPE_TIME2 = 19
_MYSQL_TYPE_NEWDECIMAL = 246
_MYSQL_TYPE_ENUM = 247
_MYSQL_TYPE_SET = 248
_MYSQL_TYPE_TINY_BLOB = 249
_MYSQL_TYPE_MEDIUM_BLOB = 250
_MYSQL_TYPE_LONG_BLOB = 251
_MYSQL_TYPE_BLOB = 252
_MYSQL_TYPE_VAR_STRING = 253
_MYSQL_TYPE_STRING = 254
_MYSQL_TYPE_GEOMETRY = 255

# Mapping of field data types to data type names
_col_types = [
    {'value': _MYSQL_TYPE_DECIMAL, 'text': 'decimal', 'size': None},
    {'value': _MYSQL_TYPE_TINY, 'text': 'tinyint', 'size': 1},
    {'value': _MYSQL_TYPE_SHORT, 'text': 'smallint', 'size': 2},
    {'value': _MYSQL_TYPE_LONG, 'text': 'int', 'size': 4},
    {'value': _MYSQL_TYPE_FLOAT, 'text': 'float', 'size': 4},
    {'value': _MYSQL_TYPE_DOUBLE, 'text': 'double', 'size': 8},
    {'value': _MYSQL_TYPE_NULL, 'text': 'NULL', 'size': 0},
    {'value': _MYSQL_TYPE_TIMESTAMP, 'text': 'timestamp', 'size': 4},
    {'value': _MYSQL_TYPE_LONGLONG, 'text': 'bigint', 'size': 8},
    {'value': _MYSQL_TYPE_INT24, 'text': 'mediumint', 'size': 3},
    {'value': _MYSQL_TYPE_DATE, 'text': 'date', 'size': 4},
    {'value': _MYSQL_TYPE_TIME, 'text': 'time', 'size': 3},
    {'value': _MYSQL_TYPE_DATETIME, 'text': 'datetime', 'size': 8},
    {'value': _MYSQL_TYPE_YEAR, 'text': 'year', 'size': 1},
    {'value': _MYSQL_TYPE_NEWDATE, 'text': 'date', 'size': 3},
    # Size must be calculated
    {'value': _MYSQL_TYPE_VARCHAR, 'text': 'varchar', 'size': -1},
    # Size must be calculated
    {'value': _MYSQL_TYPE_BIT, 'text': 'bit', 'size': -2},
    {'value': _MYSQL_TYPE_TIMESTAMP2, 'text': 'timestamp', 'size': 4},
    {'value': _MYSQL_TYPE_DATETIME2, 'text': 'datetime', 'size': 8},
    {'value': _MYSQL_TYPE_TIME2, 'text': 'time', 'size': 3},
    {'value': _MYSQL_TYPE_NEWDECIMAL, 'text': 'decimal', 'size': None},
    {'value': _MYSQL_TYPE_ENUM, 'text': 'enum', 'size': 0},
    {'value': _MYSQL_TYPE_SET, 'text': 'set', 'size': 0},
    {'value': _MYSQL_TYPE_TINY_BLOB, 'text': 'tinyblob',
     'size': 1 + _PORTABLE_SIZEOF_CHAR_PTR},
    {'value': _MYSQL_TYPE_MEDIUM_BLOB, 'text': 'mediumblob',
     'size': 3 + _PORTABLE_SIZEOF_CHAR_PTR},
    {'value': _MYSQL_TYPE_LONG_BLOB, 'text': 'longblob',
     'size': 4 + _PORTABLE_SIZEOF_CHAR_PTR},
    {'value': _MYSQL_TYPE_BLOB, 'text': 'blob',
     'size': 2 + _PORTABLE_SIZEOF_CHAR_PTR},
    # Size must be calculated
    {'value': _MYSQL_TYPE_VAR_STRING, 'text': 'varchar', 'size': -1},
    {'value': _MYSQL_TYPE_STRING, 'text': 'char', 'size': None},
    {'value': _MYSQL_TYPE_GEOMETRY, 'text': 'geometry',
     'size': 4 + _PORTABLE_SIZEOF_CHAR_PTR},
]

_col_keys = [item['value'] for item in _col_types]

# Database/engine type definitions
_DB_TYPE_UNKNOWN = 0
_DB_TYPE_DIAB_ISAM = 1
_DB_TYPE_HASH = 2
_DB_TYPE_MISAM = 3
_DB_TYPE_PISAM = 4
_DB_TYPE_RMS_ISAM = 5
_DB_TYPE_HEAP = 6
_DB_TYPE_ISAM = 7
_DB_TYPE_MRG_ISAM = 8
_DB_TYPE_MYISAM = 9
_DB_TYPE_MRG_MYISAM = 10
_DB_TYPE_BERKELEY_DB = 11
_DB_TYPE_INNODB = 12
_DB_TYPE_GEMINI = 13
_DB_TYPE_NDBCLUSTER = 14
_DB_TYPE_EXAMPLE_DB = 15
_DB_TYPE_ARCHIVE_DB = 16
_DB_TYPE_CSV_DB = 17
_DB_TYPE_FEDERATED_DB = 18
_DB_TYPE_BLACKHOLE_DB = 19
_DB_TYPE_PARTITION_DB = 20
_DB_TYPE_BINLOG = 21
_DB_TYPE_SOLID = 22
_DB_TYPE_PBXT = 23
_DB_TYPE_TABLE_FUNCTION = 24
_DB_TYPE_MEMCACHE = 25
_DB_TYPE_FALCON = 26
_DB_TYPE_MARIA = 27
_DB_TYPE_PERFORMANCE_SCHEMA = 28
_DB_TYPE_FIRST_DYNAMIC = 42
_DB_TYPE_DEFAULT = 127

# Mapping of engine types to engine names
_engine_types = [
    {'value': _DB_TYPE_UNKNOWN, 'text': 'UNKNOWN'},
    {'value': _DB_TYPE_DIAB_ISAM, 'text': 'ISAM'},
    {'value': _DB_TYPE_HASH, 'text': 'HASH'},
    {'value': _DB_TYPE_MISAM, 'text': 'MISAM'},
    {'value': _DB_TYPE_PISAM, 'text': 'PISAM'},
    {'value': _DB_TYPE_RMS_ISAM, 'text': 'RMS_ISAM'},
    {'value': _DB_TYPE_HEAP, 'text': 'HEAP'},
    {'value': _DB_TYPE_ISAM, 'text': 'ISAM'},
    {'value': _DB_TYPE_MRG_ISAM, 'text': 'MERGE'},
    {'value': _DB_TYPE_MYISAM, 'text': 'MYISAM'},
    {'value': _DB_TYPE_MRG_MYISAM, 'text': 'MERGE'},
    {'value': _DB_TYPE_BERKELEY_DB, 'text': 'BDB'},
    {'value': _DB_TYPE_INNODB, 'text': 'INNODB'},
    {'value': _DB_TYPE_GEMINI, 'text': 'GEMINI'},
    {'value': _DB_TYPE_NDBCLUSTER, 'text': 'NDBCLUSTER'},
    {'value': _DB_TYPE_EXAMPLE_DB, 'text': 'EXAMPLE'},
    {'value': _DB_TYPE_ARCHIVE_DB, 'text': 'ARCHIVE'},
    {'value': _DB_TYPE_CSV_DB, 'text': 'CSV'},
    {'value': _DB_TYPE_FEDERATED_DB, 'text': 'FEDERATED'},
    {'value': _DB_TYPE_BLACKHOLE_DB, 'text': 'BLACKHOLE'},
    {'value': _DB_TYPE_PARTITION_DB, 'text': 'PARTITION'},
    {'value': _DB_TYPE_BINLOG, 'text': 'BINLOG'},
    {'value': _DB_TYPE_SOLID, 'text': 'SOLID'},
    {'value': _DB_TYPE_PBXT, 'text': 'PBXT'},
    {'value': _DB_TYPE_TABLE_FUNCTION, 'text': 'FUNCTION'},
    {'value': _DB_TYPE_MEMCACHE, 'text': 'MEMCACHE'},
    {'value': _DB_TYPE_FALCON, 'text': 'FALCON'},
    {'value': _DB_TYPE_MARIA, 'text': 'MARIA'},
    {'value': _DB_TYPE_PERFORMANCE_SCHEMA, 'text': 'PERFORMANCE_SCHEMA'},
    {'value': _DB_TYPE_FIRST_DYNAMIC, 'text': 'DYNAMIC'},
    {'value': _DB_TYPE_DEFAULT, 'text': 'DEFAULT'},
]
_engine_keys = [item['value'] for item in _engine_types]

# Key algorithms
_KEY_ALG = ['UNDEFINED', 'BTREE', 'RTREE', 'HASH', 'FULLTEXT']

# Format definitions
#                            1         2         3
#                  01234567890123456789012345678901
_HEADER_FORMAT = "<BBBBHHIHHIHHHHHBBIBBBBBIIIIBBBHH"
#                        11122222333333444445556666
#                  12346824602468023489012371590124
#                 ***   111111
#             0123456789012345
_COL_DATA = "<BBBBBBBBBBBBBBBH"
#             0123456789111111
#                       012345

# Various flags copied from server source code - some may not be used but
# may find a use as more esoteric table configurations are tested. These
# are derived from fields.h and all may not apply but are included for
# future expansion/features.
_FIELDFLAG_DECIMAL = 1
_FIELDFLAG_BINARY = 1
_FIELDFLAG_NUMBER = 2
_FIELDFLAG_ZEROFILL = 4
_FIELDFLAG_PACK = 120	              # Bits used for packing
_FIELDFLAG_INTERVAL = 256             # mangled with decimals!
_FIELDFLAG_BITFIELD = 512	          # mangled with decimals!
_FIELDFLAG_BLOB = 1024	              # mangled with decimals!
_FIELDFLAG_GEOM = 2048                # mangled with decimals!
_FIELDFLAG_TREAT_BIT_AS_CHAR = 4096   # use Field_bit_as_char
_FIELDFLAG_LEFT_FULLSCREEN = 8192
_FIELDFLAG_RIGHT_FULLSCREEN = 16384
_FIELDFLAG_FORMAT_NUMBER = 16384      # predit: ###,,## in output
_FIELDFLAG_NO_DEFAULT = 16384         # sql
_FIELDFLAG_SUM = 32768                # predit: +#fieldflag
_FIELDFLAG_MAYBE_NULL = 32768         # sql
_FIELDFLAG_HEX_ESCAPE = 0x10000
_FIELDFLAG_PACK_SHIFT = 3
_FIELDFLAG_DEC_SHIFT = 8
_FIELDFLAG_MAX_DEC = 31
_FIELDFLAG_NUM_SCREEN_TYPE = 0x7F01
_FIELDFLAG_ALFA_SCREEN_TYPE = 0x7800

# Additional flags
_NOT_NULL_FLAG = 1             # Field can't be NULL
_PRI_KEY_FLAG = 2              # Field is part of a primary key
_UNIQUE_KEY_FLAG = 4           # Field is part of a unique key
_MULTIPLE_KEY_FLAG = 8         # Field is part of a key
_BLOB_FLAG = 16                # Field is a blob
_UNSIGNED_FLAG = 32            # Field is unsigned
_HA_PACK_RECORD = 1            # Pack record?
_HA_FULLTEXT = 128             # For full-text search
_HA_SPATIAL = 1024             # For spatial search

# Row type definitions
_ROW_TYPE_DEFAULT, _ROW_TYPE_FIXED, _ROW_TYPE_DYNAMIC, _ROW_TYPE_COMPRESSED, \
    _ROW_TYPE_REDUNDANT, _ROW_TYPE_COMPACT, _ROW_TYPE_PAGE = range(0, 7)

# enum utypes from field.h
_NONE, _DATE, _SHIELD, _NOEMPTY, _CASEUP, _PNR, _BGNR, _PGNR, _YES, _NO, \
    _REL, _CHECK, _EMPTY, _UNKNOWN_FIELD, _CASEDN, _NEXT_NUMBER, \
    _INTERVAL_FIELD, _BIT_FIELD, _TIMESTAMP_OLD_FIELD, _CAPITALIZE, \
    _BLOB_FIELD, _TIMESTAMP_DN_FIELD, _TIMESTAMP_UN_FIELD, \
    _TIMESTAMP_DNUN_FIELD = range(0, 24)

# Array of field data types that can be unsigned
_UNSIGNED_FIELDS = ['TINYINT', 'SMALLINT', 'MEDIUMINT', 'INT', 'INTEGER',
                    'BIGINT', 'REAL', 'DOUBLE', 'FLOAT', 'DECIMAL', 'NUMERIC']

# Array of field data types that can have character set options
_CS_ENABLED = ['CHAR', 'VARCHAR', 'TINYBLOB', 'BLOB', 'MEDIUMBLOB', 'LONGBLOB',
               'ENUM', 'SET']

# Array of index (key) types
_KEY_TYPES = ['PRIMARY', 'UNIQUE', 'MULTIPLE', 'FULLTEXT', 'SPATIAL',
              'FOREIGN_KEY']

# Array of field data types that do not require parens for size
_NO_PARENS = ['TIMESTAMP', 'DATETIME', 'YEAR', 'DATE', 'TIME',
              'TINYBLOB', 'BLOB', 'MEDIUMBLOB', 'LONGBLOB',
              'TINYTEXT', 'TEXT', 'MEDIUMTEXT', 'LONGTEXT']

# Array of field data types that are real data
_REAL_TYPES = ['REAL', 'DOUBLE', 'FLOAT', 'DECIMAL', 'NUMERIC']

# Array of blob data types
_BLOB_TYPES = [_MYSQL_TYPE_TINY_BLOB, _MYSQL_TYPE_MEDIUM_BLOB,
               _MYSQL_TYPE_LONG_BLOB, _MYSQL_TYPE_BLOB,
               _MYSQL_TYPE_GEOMETRY]

# Array of data types that do not use keysize for indexes
_NO_KEYSIZE = ['BIT', 'ENUM', 'SET', 'DECIMAL', 'NUMERIC',
               'TIMESTAMP', 'TIME', 'DATETIME']


def _is_decimal(col):
    """Check for decimal data types
    Returns bool - True if column is decimal or numeric.
    """
    return col['field_type_name'].upper() in ['DECIMAL', 'NUMERIC']


def _is_cs_enabled(col):
    """Check for data types that accept character set option
    Returns bool - True if column supports character set option.
    """
    return col['field_type_name'].upper() in _CS_ENABLED


def _is_unsigned(col):
    """Check for unsigned data types
    Returns bool - True if column is an unsigned type.
    """
    return col['field_type_name'].upper() in _UNSIGNED_FIELDS


def _is_real(col):
    """Check for real data types
    Returns bool - True if column is a real type.
    """
    return col['field_type_name'].upper() in _REAL_TYPES


def _is_no_parens(col):
    """Check for column uses parens for size
    Returns bool - True if column needs parens for size.
    """
    return col['field_type_name'].upper() in _NO_PARENS


def _is_blob(col):
    """Check for blob data types
    Returns bool - True if column is a blob.
    """
    return col['field_type'] in _BLOB_TYPES


def _is_geometry(flags):
    """Check for geometry field types
    Returns bool - True if geometry type.
    """
    print "flags: %0x" % flags
    return (flags & _FIELDFLAG_GEOM) == _FIELDFLAG_GEOM


def _no_keysize(col):
    """Check for data types that do not use keysize
    Returns bool - True if column is to be exluded from keysize.
    """
    return col['field_type_name'].upper() in _NO_KEYSIZE


def _print_default_values(values):
    """Print default values

    The method prints the default values 2 bytes at a time in hexidecimal
    and ASCII representation (similar to hexdump).

    values[in]         Array of default values
    """
    num_bytes = len(values)
    print "# Default values raw data:"
    i = 0
    while (i < num_bytes):
        def_str = ""
        j = 0
        print "#",
        while (j < 8) and (i < num_bytes):
            print "%02x" % ord(values[i]),
            def_str += values[i]
            i += 1
            j += 1
        print "",
        j = 0
        while (j < 8) and (i < num_bytes):
            print "%02x" % ord(values[i]),
            def_str += values[i]
            i += 1
            j += 1
        print " |",
        print def_str


def _get_pack_length(col):
    """Find the pack length for the field

    col[in]        Column data read for the column to operate

    Returns tuple - (pack_length, field_size)
    """
    size = _col_types[bisect.bisect_left(_col_keys,
                                         col['field_type'])]['size']
    if size == -1:
        col_len = col['bytes_in_col']
        return (1 if int(col_len) < 256 else 2), col_len
    if size == -2:
        col_len = col['bytes_in_col']
        return col_len / 8, col_len
    if size is None:
        return size, col['bytes_in_col']  # It's a string of some sort
    return 0, size


def _get_blob_text(col):
    """Form the correct field name string for blobs and text fields

    col[in]        Column data read for the column to operate

    Returns string - field name string
    """
    type_str = ""
    if col['field_type'] == _MYSQL_TYPE_TINY_BLOB:
        type_str = "tiny"
    elif col['field_type'] == _MYSQL_TYPE_MEDIUM_BLOB:
        type_str = "medium"
    elif col['field_type'] == _MYSQL_TYPE_LONG_BLOB:
        type_str = "long"
    if col['charset'] == _MY_CHARSET_BIN_NUM:
        type_str = "".join([type_str, "blob"])
    else:
        type_str = "".join([type_str, "text"])

    return type_str


def _format_default(col, col_flags, length, decimals):
    """Format a defaut value for printing

    col[in]        Column data dictionary
    col_flags[in]  Flags for column
    length[in]     Length of default value or integer part for floats
    decimals[in]   Number of decimal positions for floats

    Returns string - default clause for CREATE statement.
    """
    default = col['default']
    if isinstance(default, str):
        fmt_str = "'%s'"
    # Check for zerofill:
    elif col_flags & _FIELDFLAG_ZEROFILL:
        if _is_real(col):
            if decimals > 0 and decimals < length:
                if col['field_type_name'].upper() == "DECIMAL":
                    length += 1
                fmt_str = "'" + '%0' + "%s" % length + '.' + \
                          "%s" % decimals + 'f' + "'"
            else:
                fmt_str = "'" + '%0' + "%s" % length + '.' + 'f' + "'"
            if float(default) == 0.0:
                fmt_str = "%s"
                default = "NULL"
        else:
            fmt_str = "'" + '%0' + "%s" % length + 'd' + "'"
    else:
        if _is_real(col):
            if decimals > 0 and decimals < length:
                fmt_str = "'" + '%' + "%s" % (length - 1) + '.' + \
                          "%s" % decimals + 'f' + "'"
            elif decimals == 0:
                fmt_str = "'%d'"
                default = divmod(default, 1)[0]
            else:
                i, decm = divmod(default, 1)
                if decm == 0:
                    fmt_str = "'%d'"
                    default = i
                else:
                    fmt_str = "'%f'"
            if float(default) == 0.0:
                fmt_str = "%s"
                default = "NULL"
        else:
            fmt_str = "'%d'"

    return " DEFAULT " + fmt_str % default


class FrmReader(object):
    """
    This class implements an abstract of the .frm file format. It can be used
    to produce a likeness of the CREATE TABLE command. It is not a 100% match
    because some of the components are missing from the .frm file. For
    example, there are no character set or collation definitions stored so
    unless one has access to the server definitions, these cannot be
    determined.

    The class permits the following operations:

    - show_create_table_statement() - read a .frm file and print its CREATE
      statement. Optionally displays statistics for the .frm file.

    """

    def __init__(self, db_name, table, frm_path, options):
        """Constructor

        db[in]             the database (if known)
        table[in]          table name
        frm_path[in]       full path to .frm file
        options[in]        options for controlling behavior:
            verbosity      print extra data during operations (optional)
                           default value = 0
            quiet          suppress output except CREATE statement
                           default False
            server        path to server for server install
                           default None
            new_engine     substitute engine
                           default None
        """
        self.general_data = None
        self.key_data = None
        self.comment_str = None
        self.engine_str = None
        self.partition_str = None
        self.col_metadata = None
        self.column_data = None
        self.num_cols = 0
        self.default_values = None
        self.frm_file = None
        self.verbosity = options.get('verbosity', 0)
        self.quiet = options.get('quiet', False)
        self.server = options.get('server', None)
        self.new_engine = options.get('new_engine', None)
        self.show_stats = options.get("show_stats", False)
        self.db_name = db_name
        self.table = table
        self.frm_path = frm_path
        self.options = options

        if self.server is None:
            self.csi = None
        else:
            self.csi = CharsetInfo(options)

    def _read_header(self):
        """Read the header information from the file
        """
        try:
            # Skip to header position
            if self.verbosity > 1:
                print "# Skipping to header at : %0000x" % 2
            self.frm_file.seek(2, 0)
            data = self.frm_file.read(_HEADER_LEN)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot read header.")

        # Read header
        header = struct.unpack(_HEADER_FORMAT, data)
        engine_name = _engine_types[bisect.bisect_left(_engine_keys,
                                                       header[1])]['text']
        self.general_data = {
            'frm_version': header[0],
            'legacy_db_type': engine_name,
            'IO_SIZE': header[4],
            'length': header[6],
            'tmp_key_length': header[7],
            'rec_length': header[8],
            'max_rows': header[10],
            'min_rows': header[11],
            'db_create_pack': header[12] >> 8,  # only want 1 byte
            'key_info_length': header[13],
            'create_options': header[14],
            'frm_file_ver': header[16],
            'avg_row_length': header[17],
            'default_charset': header[18],
            'row_type': header[20],
            'charset_low': header[21],
            'table_charset': (header[21] << 8) + header[18],
            'key_length': header[24],
            'MYSQL_VERSION_ID': header[25],
            'extra_size': header[26],
            'default_part_eng': header[29],
            'key_block_size': header[30],
        }
        # Fix storage engine string if partitioning engine specified
        if self.general_data['default_part_eng'] > 0 and \
           self.new_engine is None:
            self.engine_str = _engine_types[bisect.bisect_left(
                _engine_keys, header[29])]['text']

        return True

    def _read_keys(self):
        """Read key fields from the file
        """
        offset = self.general_data['IO_SIZE']
        try:
            # Skip ahead to key section
            if self.verbosity > 1:
                print "# Skipping to key data at : %0000x" % int(offset)
            self.frm_file.seek(offset, 0)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot locate keys.")

        # Decipher key parts
        num_keys = struct.unpack("<B", self.frm_file.read(1))[0]
        if num_keys & 0x80:
            next_byte = struct.unpack("<B", self.frm_file.read(1))[0]
            num_keys = (next_byte << 7) | (num_keys & 0x7f)
            low = struct.unpack("<B", self.frm_file.read(1))[0]
            high = struct.unpack("<B", self.frm_file.read(1))[0]
            num_key_parts = low + (high << 8)
            self.frm_file.read(2)
        else:
            num_key_parts = struct.unpack("<B", self.frm_file.read(1))[0],
            self.frm_file.read(4)

        self.key_data = {
            'num_keys': num_keys,
            'num_key_parts': num_key_parts,
            'key_names': [],
            'keys': [],
        }

        for i in range(0, self.key_data['num_keys']):
            key_info = {
                'flags': struct.unpack("<H", self.frm_file.read(2))[0],
                'key_length': struct.unpack("<H", self.frm_file.read(2))[0],
                'num_parts': struct.unpack("<B", self.frm_file.read(1))[0],
                'algorithm': struct.unpack("<B", self.frm_file.read(1))[0],
                'block_size': struct.unpack("<H", self.frm_file.read(2))[0],
                'key_parts': [],
                'comment': "",
            }
            for j in range(0, key_info['num_parts']):
                if self.verbosity > 1:
                    print "# Reading key part %s." % j
                key_part_info = {
                    'field_num': struct.unpack("<H",
                                               self.frm_file.read(2))[0] &
                    _FIELD_NR_MASK,
                    'offset': struct.unpack("<H",
                                            self.frm_file.read(2))[0] - 1,
                    'key_type': struct.unpack("<H",
                                              self.frm_file.read(2))[0],
                    'key_part_flag': struct.unpack("<B",
                                                   self.frm_file.read(1))[0],
                    'length': struct.unpack("<H",
                                            self.frm_file.read(2))[0],
                }
                key_info['key_parts'].append(key_part_info)
            self.key_data['keys'].append(key_info)

        terminator = struct.unpack("c", self.frm_file.read(1))[0]
        for i in range(0, self.key_data['num_keys']):
            key_name = ""
            # Read until the next 0xff
            char_read = ""
            while char_read != terminator:
                char_read = struct.unpack("c", self.frm_file.read(1))[0]
                if char_read != terminator:
                    key_name += str(char_read)
            self.key_data['key_names'].append(key_name)

        # Now find the key comments!
        self.frm_file.read(1)
        for i in range(0, self.key_data['num_keys']):
            if (self.key_data['keys'][i]['flags'] & _HA_USES_COMMENT) == \
               _HA_USES_COMMENT:
                k_len = struct.unpack("<H", self.frm_file.read(2))[0]
                com_str = struct.unpack("c" * k_len, self.frm_file.read(k_len))
                self.key_data['keys'][i]['comment'] = "".join(com_str)

        return True

    def _read_comment(self):
        """Read the table comments.
        """
        # Fields can be found 1 IO_SIZE more than what has been read to date
        # plus 46 bytes.
        io_size = self.general_data['IO_SIZE']
        record_offset = io_size + self.general_data['tmp_key_length'] + \
            self.general_data['rec_length']
        offset = (((record_offset / io_size) + 1) * io_size) + 46
        try:
            # Skip to column position
            if self.verbosity > 1:
                print "# Skipping to table comments at : %0000x" % int(offset)
            self.frm_file.seek(offset, 0)
            data = self.frm_file.read(1)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot read table comment.")

        comment_len = struct.unpack("<B", data)[0]
        com_chars = struct.unpack("c" * comment_len,
                                  self.frm_file.read(comment_len))
        self.comment_str = "".join(com_chars)
        return True

    def _read_default_values(self):
        """Read the default values for all columns
        """
        offset = self.general_data['IO_SIZE'] + \
            self.general_data['tmp_key_length']
        try:
            # Skip ahead to key section
            if self.verbosity > 1:
                print "# Skipping to default data at : %0000x" % \
                    int(offset + 1)
            self.frm_file.seek(offset + 1, 0)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot find default data.")

        num_bytes = self.general_data['rec_length']
        # allow overflow
        self.default_values = self.frm_file.read(num_bytes + 100)

    def _read_engine_data(self):
        """Read the storage engine data.
        """
        # We must calculate the location of the partition information by
        # locating the storage engine name and if it is 'partition' then read
        # the partition string following that.

        offset = self.general_data['IO_SIZE'] + \
            self.general_data['tmp_key_length'] + \
            self.general_data['rec_length']
        try:
            # Skip ahead to key section
            if self.verbosity > 1:
                print "# Skipping to keys at : %0000x" % int(offset + 2)
            self.frm_file.seek(offset + 2, 0)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot find engine data.")

        engine_len = struct.unpack("<H", self.frm_file.read(2))[0]

        engine_str = "".join(struct.unpack("c" * engine_len,
                                           self.frm_file.read(engine_len)))

        # Save engine name unless user specified a new engine to use
        if self.engine_str is None:
            if self.new_engine is None:
                self.engine_str = engine_str
            else:
                self.engine_str = self.new_engine

        part_len = struct.unpack("<I", self.frm_file.read(4))[0]
        part_str = "".join(struct.unpack("c" * part_len,
                                         self.frm_file.read(part_len)))
        self.partition_str = " ".join(part_str.split('\n'))

        return True

    def _read_column_names(self, fields_per_screen):
        """Read the table column names.
        """
        # Column names start in 00002152.
        screens_read = 1

        cols = []
        col_in_screen = 0
        for i in range(0, self.num_cols):
            if (col_in_screen == fields_per_screen):
                screens_read += 1
                col_in_screen = 1
                # Do the skips
                self.frm_file.read(8)  # read ahead 8 bytes
                val = '\x20'
                while val == '\x20':  # skip the spaces
                    val = self.frm_file.read(1)
                self.frm_file.read(2)  # read past 2 more bytes
            else:
                col_in_screen += 1
            # get length byte
            col_len = struct.unpack("<B", self.frm_file.read(1))[0]
            col_str = ""
            # Don't copy trailing \x00
            j = 0
            while j < col_len - 1:
                char_found = struct.unpack("c", self.frm_file.read(1))[0]
                col_str += char_found
                j += 1
            # skip trailing \x00 and extra bits except for last col read
            if (i < self.num_cols - 1):
                self.frm_file.read(3)
            cols.append(col_str)
        return (screens_read, cols)

    def _get_decimal_value(self, recpos, col):
        """Get a decimal value from the default column data

        recpos[in]     Position in default row to find data
        col[in]        Column dictionary for the column data

        Returns float - default value retrieved
        """
        # Guard
        if not _is_decimal(col):
            return None
        col_flags = (int(col['flags_extra'] << 8) + col['flags'])
        length = col['bytes_in_col']
        decimals = (col_flags >> _FIELDFLAG_DEC_SHIFT) & _FIELDFLAG_MAX_DEC
        length = length - (1 if decimals else 0) - \
            (1 if (col_flags & _FIELDFLAG_DECIMAL) or (length == 0) else 0)

        # algorithm from bin2decimal()
        #int intg=precision-scale,
        #    intg0=intg/DIG_PER_DEC1, frac0=scale/DIG_PER_DEC1,
        #    intg0x=intg-intg0*DIG_PER_DEC1, frac0x=scale-frac0*DIG_PER_DEC1;
        #
        #return intg0*sizeof(dec1)+dig2bytes[intg0x]+
        #       frac0*sizeof(dec1)+dig2bytes[frac0x];

        intg = length - decimals
        intg0 = intg / _DIG_PER_DEC1
        frac0 = decimals / _DIG_PER_DEC1
        intg0x = intg - (intg0 * _DIG_PER_DEC1)
        frac0x = decimals - (frac0 * _DIG_PER_DEC1)
        int_len = (intg0 * 4 + _DIG2BYTES[intg0x]) - 1  # len of integer part
        frac_len = (frac0 * 4 + _DIG2BYTES[frac0x])   # len of fractional part
        int_val = 0
        shift_num = int_len - 1
        for i in range(0, int_len):
            int_val += ord(self.default_values[recpos + i + 1]) << \
                (shift_num * 8)
            shift_num -= 1
        frac_val = 0
        shift_num = frac_len - 1
        for i in range(0, frac_len):
            frac_val += ord(self.default_values[recpos + int_len + i + 1]) << \
                (shift_num * 8)
            shift_num -= 1
        return float("%s.%s" % (int_val, frac_val))

    def _get_field_defaults(self):
        """Retrieve the default values for the columns.
        """
        max_len = len(self.default_values)
        if self.verbosity > 2:
            _print_default_values(self.default_values)
        for i in range(0, len(self.column_data)):
            col = self.column_data[i]
            recpos = self.column_data[i]['recpos']
            recpos -= 2
            if recpos < 0:
                recpos = 0
            if recpos > max_len:  # safety net
                continue

            # Read default for decimal types
            if _is_decimal(col):
                col['default'] = self._get_decimal_value(recpos, col)
                continue
            len_pos, size = _get_pack_length(col)
            field_cs_num = (col['charset_low'] << 8) + col['charset']
            # Adjust size based on character set maximum length per char
            if _is_cs_enabled(col):
                if self.csi:
                    maxlen = self.csi.get_maxlen(field_cs_num)
                else:
                    maxlen = 1
                size = size / maxlen
            if len_pos is None:
                value = self.default_values[recpos:recpos + size]
            else:
                value = self.default_values[recpos:recpos + len_pos + size]

            # Read default for double type
            if col['field_type'] == _MYSQL_TYPE_DOUBLE:
                col['default'] = struct.unpack('d', value)[0]
                continue

            # Read default for float type
            if col['field_type'] == _MYSQL_TYPE_FLOAT:
                col['default'] = struct.unpack('f', value)[0]
                continue

            # Need to check for column type. Some are binary!
            if len_pos is None:  # Some form of string
                col_str = ""
                for col_def in range(0, len(value)):
                    if value[col_def] != '\x20':
                        col_str += value[col_def]
                col['default'] = '' if len(col_str) == 0 else col_str
            elif len_pos == 0:   # packed numeric
                len_pos = size
            if len_pos == 1:
                col['default'] = struct.unpack("<B", value[0:1])[0]
            elif len_pos == 2:
                col['default'] = struct.unpack("<H", value[0:2])[0]
            elif len_pos == 3:
                col['default'] = struct.unpack("<HB", value[0:3])[0]
            elif len_pos == 4:
                col['default'] = struct.unpack("<I", value[0:4])[0]
            elif len_pos == 8:
                col['default'] = struct.unpack("<Q", value[0:8])[0]

    def _read_column_metadata(self):
        """Read the column metadata (size, flags, etc.).

        Returns dictionary - column definition data
        """
        column_data = []
        # Skip ahead
        try:
            for i in range(0, self.num_cols):
                if self.verbosity > 1:
                    print "# Reading column metadata #%s" % i
                data = struct.unpack(_COL_DATA, self.frm_file.read(17))
                data_type = _col_types[bisect.bisect_left(_col_keys,
                                                          data[13])]
                col_def = {
                    'field_length': data[2],  # 1, +3
                    'bytes_in_col': int(data[3]) + (int(data[4]) << 8),
                    'recpos': (int(data[6]) << 8) +
                    (int(data[5])) + (int(data[4]) << 16),
                    'unireg': data[7],  # 1, +8
                    'flags': data[8],  # 1, +9
                    'flags_extra': data[9],  # 1, +10
                    'unireg_type': data[10],  # 1, +11
                    'charset_low': data[11],  # 1, +12
                    'interval_nr': data[12],  # 1, +13
                    'field_type': data[13],  # 1, +14
                    'field_type_name': data_type['text'],
                    'charset': data[14],  # 1, +15
                    'comment_length': data[15],  # 2, +17
                    'enums': [],
                    'comment': "",
                    'default': None,
                }
                column_data.append(col_def)
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot locate column data")
        return column_data

    def _read_column_data(self):
        """Read the column information from the file.

        This method builds the list of columns including defaults,
        data type, and determines enum and set values.
        """
        # Fields can be found 1 IO_SIZE more than what has been read to date
        # plus 258 bytes.
        io_size = self.general_data['IO_SIZE']
        record_offset = io_size + self.general_data['tmp_key_length'] + \
            self.general_data['rec_length']
        offset = (((record_offset / io_size) + 1) * io_size) + 258
        try:
            # Skip to column position
            if self.verbosity > 1:
                print "# Skipping to column data at : %0000x" % int(offset)
            self.frm_file.seek(offset, 0)
            data = struct.unpack("<HHHHHHHHHHHHH", self.frm_file.read(26))
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot read column header.")
        self.num_cols = data[0]
        self.col_metadata = {
            'num_cols': data[0],
            'pos': data[1],
            'unknown': data[2],
            'n_length': data[3],
            'interval_count': data[4],
            'interval_parts': data[5],
            'int_length': data[6],
            'com_length': data[8],
            'null_fields': data[12],
        }
        if self.verbosity > 1:
            pprint(self.col_metadata)

        # Skip ahead
        try:
            self.frm_file.read(7)
            fields_per_screen = struct.unpack("<B", self.frm_file.read(1))[0]
            if self.verbosity > 1:
                print "# Fields per screen =", fields_per_screen
            self.frm_file.read(46)
            col_names = self._read_column_names(fields_per_screen)[1]
            self.frm_file.read(1)  # skip 1 byte
            self.column_data = self._read_column_metadata()
        except Exception, error:
            if self.verbosity > 1:
                print "EXCEPTION:", error
            raise UtilError("Cannot read column data.")

        # TODO: Add ability to read defaults by modifying _get_field_defaults
        #       method to correctly read the default values. Currently, it
        #       does not read some non-character values correctly. When fixed,
        #       remove this comment and uncomment the following line.
        # self._get_field_defaults()

        # Skip column names
        col_len = 0
        for colname in col_names:
            col_len += len(colname)
        # Skip to enum section
        self.frm_file.read(len(col_names) + col_len + 2)
        intervals = []
        interval_num = 0
        for i in range(0, len(col_names)):
            self.column_data[i]['name'] = col_names[i]
            # Here we read enums and match them to interval_nr.
            i_num = self.column_data[i]['interval_nr']
            if int(i_num) > 0:
                if interval_num < i_num:
                    interval_num += 1
                    cols = []
                    char_found = 99
                    col_str = ''
                    while not char_found == 0:
                        char_found = struct.unpack("B",
                                                   self.frm_file.read(1))[0]
                        if char_found == 255:
                            if len(col_str):
                                cols.append(col_str)
                                col_str = ''
                        else:
                            col_str += chr(char_found)
                    intervals.append(cols)
                self.column_data[i]['enums'].extend(
                    intervals[interval_num - 1])

        # Now read column comments
        for i in range(0, len(col_names)):
            if self.verbosity > 1:
                print "# Column comment:", \
                    self.column_data[i]['comment_length']
            if self.column_data[i]['comment_length'] > 0:
                col_str = ''
                for j in range(0, self.column_data[i]['comment_length']):
                    if self.verbosity > 3:
                        print "# Reading column data %s." % j
                    char_found = struct.unpack("B", self.frm_file.read(1))[0]
                    col_str += chr(char_found)
                self.column_data[i]['comment'] = col_str

        return True

    def _get_charset_collation(self, col):
        """Get the character set and collation for column

        col[in]        Column data dictionary

        Returns list - option strings for charset and collation if needed
        """
        parts = []
        field_cs_num = (col['charset_low'] << 8) + col['charset']
        table_cs_num = self.general_data['table_charset']
        # If no character set information, add unknown tag to prompt user
        if self.csi is None:
            if field_cs_num is not None and table_cs_num is not None and \
               field_cs_num != 'binary' and table_cs_num != field_cs_num:
                parts.append(" CHARACTER SET <UNKNOWN>")
            return parts
        field_cs_name = self.csi.get_name(field_cs_num)
        table_cs_name = self.csi.get_name(table_cs_num)
        if field_cs_name is not None and table_cs_name is not None and \
           field_cs_name != 'binary' and table_cs_name != field_cs_name:
            parts.append(" CHARACTER SET `%s`" % field_cs_name)

        elif (field_cs_name is None or table_cs_name is None) and \
                not self.quiet:
            print "C",
            print "# WARNING: Cannot get character set name for id =", id
            parts.append(" CHARACTER SET <UNKNOWN>")
        else:
            parts.append("")

        # Get collation
        def_field_col = self.csi.get_default_collation(field_cs_num)
        field_col = self.csi.get_collation(field_cs_num)
        if def_field_col is not None and field_col is not None and \
           def_field_col[1] != field_col:
            parts.append(" COLLATE `%s`" % field_col)
        elif def_field_col is None and not self.quiet:
            print "# WARNING: Cannot get default collation for id =", id
        elif field_col is None and not self.quiet:
            print "# WARNING: Cannot get collation for id =", id
        else:
            parts.append("")

        return parts

    def _get_column_definitions(self):
        """Build the column definitions

        This method constructs the column definitions from the column data
        read from the file.

        Returns list of strings - column definitions
        """
        columns = []
        stop = len(self.column_data)
        for i in range(0, stop):
            col = self.column_data[i]
            col_flags = (int(col['flags_extra'] << 8) + col['flags'])
            length = int(col['bytes_in_col'])
            # Here we need to check for charset maxlen and adjust accordingly
            field_cs_num = (col['charset_low'] << 8) + col['charset']
            if self.csi:
                maxlen = self.csi.get_maxlen(field_cs_num)
            else:
                maxlen = 1
            # Only convert the length for character type fields
            if _is_cs_enabled(col):
                length = length / maxlen
            decimals = int((col_flags >> _FIELDFLAG_DEC_SHIFT) &
                           _FIELDFLAG_MAX_DEC)
            col_parts = []
            # name, data type, length
            # If enum or set values, put those in definition
            if col['enums']:
                col_str = "  `%s` %s(" % (col['name'], col['field_type_name'])
                col_str += ",".join(["'%s'" % i for i in col['enums']])
                col_str += ")"
                col_parts.append(col_str)
            elif _is_no_parens(col) and not _is_blob(col):
                col_parts.append("  `%s` %s" %
                                 (col['name'],
                                  col['field_type_name'].lower()))
            # for blobs
            elif _is_blob(col):
                col_parts.append("  `%s` %s" % (col['name'],
                                                _get_blob_text(col)))
            # for real types:
            elif _is_real(col):
                length_str = ""
                if _is_decimal(col):
                    length = length - (1 if decimals else 0) - \
                        (1 if (col_flags & _FIELDFLAG_DECIMAL) or
                         (length == 0) else 0)
                if decimals == _FIELDFLAG_MAX_DEC:
                    if not col['field_type_name'].upper() in \
                       ["FLOAT", "DOUBLE"]:
                        length_str = "(%s)" % length
                else:
                    length_str = "(%s,%s)" % (length, decimals)
                col_parts.append("  `%s` %s%s" %
                                 (col['name'],
                                  col['field_type_name'].lower(),
                                  length_str))
            else:
                col_parts.append("  `%s` %s(%s)" % (col['name'],
                                 col['field_type_name'].lower(),
                                 length))

            # unsigned
            if col_flags & _FIELDFLAG_DECIMAL == 0 and _is_unsigned(col):
                col_parts.append(" unsigned")

            # zerofill
            if col_flags & _FIELDFLAG_ZEROFILL and _is_unsigned(col):
                col_parts.append(" zerofill")

            # character set and collation options
            if _is_cs_enabled(col):
                col_parts.extend(self._get_charset_collation(col))

            # null
            if col_flags & _FIELDFLAG_MAYBE_NULL:
                if not col['default']:
                    col_parts.append(" DEFAULT NULL")
            elif not _is_blob(col):
                col_parts.append(" NOT NULL")

            # default - Check the _FIELDFLAG_NO_DEFAULT flag. If this flag
            #           is set, there is no default.
            default = col['default']
            if col['field_type'] in [_MYSQL_TYPE_TIMESTAMP,
                                     _MYSQL_TYPE_TIMESTAMP2]:
                col_parts.append(" DEFAULT CURRENT_TIMESTAMP "
                                 "ON UPDATE CURRENT_TIMESTAMP")
            elif col_flags & _FIELDFLAG_NO_DEFAULT == 0 and \
                    default is not None:
                col_parts.append(_format_default(col, col_flags,
                                                 length, decimals))

            # auto increment
            if col['unireg_type'] == _NEXT_NUMBER:
                col_parts.append(" AUTO_INCREMENT")

            if len(col['comment']) > 0:
                col_parts.append(" comment '%s'" % col['comment'])

            # if not the last column or if there are keys, append comma
            if i < stop - 1 or self.key_data['num_keys'] > 0:
                col_parts.append(",")
            col_parts.append(" ")
            columns.append("".join(col_parts))

        return columns

    def _get_key_size(self, col, key_info, flags):
        """Get the key size option for column

        col[in]        Column data dictionary
        key_info[in]   Key information
        flags[in]      Key flags

        Returns string - string of (N) for size or None for no size information
        """
        size_info = None
        if _no_keysize(col) or self.csi is None:
            return size_info
        key_len = int(key_info['length'])
        pack_len = _get_pack_length(col)
        if col['field_type_name'].upper() == "VARCHAR":
            field_len = int(col['field_length'])
        elif (_is_real(col) or _is_unsigned(col) or _is_decimal(col)) and \
                pack_len[0]:
            field_len = int(pack_len[0])
        else:
            field_len = int(pack_len[1])
        field_cs_num = (col['charset_low'] << 8) + col['charset']
        if self.csi:
            maxlen = self.csi.get_maxlen(field_cs_num)
        else:
            maxlen = 1

        # Geometry is an exception
        if col['field_type'] == _MYSQL_TYPE_GEOMETRY:
            if self.csi:
                size_info = "(%d)" % key_len
            else:
                size_info = "(UNKNOWN)"

        elif not field_len == key_len and \
                not int(flags) & _HA_FULLTEXT and not int(flags) & _HA_SPATIAL:
            if self.csi:
                size_info = "(%d)" % (key_len / maxlen)
            else:
                size_info = "(UNKNOWN)"
        return size_info

    def _get_key_columns(self):
        """Build the key column definitions

        This method constructs the key definitions from the column data
        read from the file.

        Returns list of strings - key column definitions
        """
        keys = []
        key_info = zip(self.key_data['key_names'], self.key_data['keys'])
        num_keys = len(key_info)
        i = 0
        for key, info in key_info:
            if key == "PRIMARY":
                key_prefix = "PRIMARY KEY"
            elif not info['flags'] & _HA_NOSAME:
                key_prefix = "UNIQUE KEY"
            else:
                key_prefix = "KEY"
            key_str = "%s `%s` (%s)"
            key_cols = ""
            for k in range(0, len(info['key_parts'])):
                key_part = info['key_parts'][k]
                col = self.column_data[key_part['field_num'] - 1]
                key_cols += "`%s`" % col['name']
                size_str = self._get_key_size(col, key_part, info['flags'])
                if size_str:
                    key_cols += size_str
                if k < len(info['key_parts']) - 1:
                    key_cols += ","
            algorithm = _KEY_ALG[info['algorithm']]
            if not algorithm == 'UNDEFINED':
                key_str += " USING %s" % algorithm
            if i < num_keys - 1:
                key_str += ","
            keys.append(key_str % (key_prefix, key, key_cols))
            i += 1
        return keys

    def _get_table_options(self):
        """Read the general table options from the file.

        Returns string - options string for CREATE statement
        """
        options = []

        gen = self.general_data   # short name to save indent, space

        options.append(") ENGINE=%s" % self.engine_str)

        if self.partition_str is not None and len(self.partition_str):
            options.append("%s" % self.partition_str)

        if gen['avg_row_length'] > 0:
            options.append("AVG_ROW_LENGTH = %s" % gen['avg_row_length'])

        if gen['key_block_size'] > 0:
            options.append("KEY_BLOCK_SIZE = %s" % gen['key_block_size'])

        if gen['max_rows'] > 0:
            options.append("MAX_ROWS = %s" % gen['max_rows'])

        if gen['min_rows'] > 0:
            options.append("MIN_ROWS = %s" % gen['min_rows'])

        if gen['default_charset'] > 0:
            # If no character set information, add unknown tag to prompt user
            if self.csi:
                c_id = int(gen['default_charset'])
                cs_name = self.csi.get_name(c_id)
                if cs_name is not None:
                    options.append("DEFAULT CHARSET=%s" % cs_name)
                elif not self.quiet:
                    print "# WARNING: Cannot find character set by id =", c_id

                # collation
                def_col = self.csi.get_default_collation(c_id)
                col = self.csi.get_collation(c_id)
                if def_col is not None and col is not None and def_col != col:
                    options.append("COLLATE=`%s`" % col)
                elif def_col is None and not self.quiet:
                    print "# WARNING: Cannot find default collation " + \
                          "for table using id =", c_id
                elif col is None and not self.quiet:
                    print "# WARNING: Cannot find collation for table " + \
                        "using id =", c_id

        row_format = ""
        row_type = int(gen['row_type'])
        if row_type == _ROW_TYPE_FIXED:
            row_format = "FIXED"
        elif row_type == _ROW_TYPE_DYNAMIC:
            row_format = "DYNAMIC"
        elif row_type == _ROW_TYPE_COMPRESSED:
            row_format = "COMPRESSED"
        elif row_type == _ROW_TYPE_REDUNDANT:
            row_format = "REDUNDANT"
        elif row_type == _ROW_TYPE_COMPACT:
            row_format = "COMPACT"
        if len(row_format) > 0:
            options.append("ROW_FORMAT = %s" % row_type)

        if self.comment_str is not None and len(self.comment_str):
            options.append("COMMENT '%s'" % self.comment_str)

        if len(options) > 1:
            return options[0] + " " + ", ".join(options[1:]) + ";"
        return options[0] + ";"

    def _build_create_statement(self):
        """Build the create statement for the .frm file.

        This method builds the CREATE TABLE information as read from
        the file.

        Returns string - CREATE TABLE string
        """
        if self.general_data is None:
            raise UtilError("Header information missing.")

        # CREATE statement preamble
        parts = []

        # Create preamble
        preamble = "CREATE TABLE %s`%s` ("
        if self.db_name is not None and len(self.db_name) > 1:
            db_str = "`%s`." % self.db_name
        else:
            db_str = ""
        parts.append(preamble % (db_str, self.table))

        # Get columns
        parts.extend(self._get_column_definitions())

        # Get indexes
        parts.extend(self._get_key_columns())

        # Create postamble and table options
        parts.append(self._get_table_options())

        return "\n".join(parts)

    def get_type(self):
        """Return the file type - TABLE or VIEW
        """
        # Fail if we cannot read the file
        try:
            self.frm_file = open(self.frm_path, "rb")
        except Exception, error:
            raise UtilError("The file %s cannot be read.\n%s" %
                            (self.frm_path, error))

        # Read the file type
        file_type = struct.unpack("<H", self.frm_file.read(2))[0]

        # Close file and exit
        self.frm_file.close()

        # Take action based on file type
        if file_type == _TABLE_TYPE:
            return "TABLE"
        elif file_type == _VIEW_TYPE:
            return "VIEW"
        else:
            return "UNKNOWN"

    def show_statistics(self):
        """Show general file and table statistics
        """

        print "# File Statistics:"
        file_stats = os.stat(self.frm_path)
        file_info = {
            'Size': file_stats[stat.ST_SIZE],
            'Last Modified': time.ctime(file_stats[stat.ST_MTIME]),
            'Last Accessed': time.ctime(file_stats[stat.ST_ATIME]),
            'Creation Time': time.ctime(file_stats[stat.ST_CTIME]),
            'Mode': file_stats[stat.ST_MODE],
        }
        for value, data in file_info.iteritems():
            print "#%22s : %s" % (value, data)
        print

        # Fail if we cannot read the file
        try:
            self.frm_file = open(self.frm_path, "rb")
        except Exception, error:
            raise UtilError("The file %s cannot be read.\n%s" %
                            (self.frm_path, error))

        # Read the file type
        file_type = struct.unpack("<H", self.frm_file.read(2))[0]

        # Take action based on file type
        if not file_type == _TABLE_TYPE:
            return

        # Read general information
        self._read_header()

        # Close file and exit
        self.frm_file.close()

        version = str(self.general_data['MYSQL_VERSION_ID'])
        ver_str = "%d.%d.%d" % (int(version[0]), int(version[1:3]),
                                int(version[3:]))
        def_part_eng = 'None'
        if self.general_data['default_part_eng'] > 0:
            def_part_eng = _engine_types[bisect.bisect_left(
                _engine_keys,
                self.general_data['default_part_eng'])]['text']
        print "# Table Statistics:"
        table_info = {
            'MySQL Version': ver_str,
            'frm Version': self.general_data['frm_version'],
            'Engine': self.general_data['legacy_db_type'],
            'IO_SIZE': self.general_data['IO_SIZE'],
            'frm File_Version': self.general_data['frm_file_ver'],
            'Def Partition Engine': def_part_eng,
        }
        for value, data in table_info.iteritems():
            print "#%22s : %s" % (value, data)
        print

    def show_create_table_statement(self):
        """Show the CREATE TABLE statement

        This method reads the .frm file specified in the constructor and
        builds a fascimile CREATE TABLE statement if the .frm file describes
        a table. For views, the method displays the CREATE VIEW statement
        contained in the file.
        """
        if not self.quiet:
            print "# Reading .frm file for %s:" % self.frm_path

        # Fail if we cannot read the file
        try:
            self.frm_file = open(self.frm_path, "rb")
        except Exception, error:
            raise UtilError("The file %s cannot be read.\n%s" %
                            (self.frm_path, error))

        # Read the file type
        file_type = struct.unpack("<H", self.frm_file.read(2))[0]

        # Take action based on file type
        if file_type == _TABLE_TYPE:
            if not self.quiet:
                print "# The .frm file is a TABLE."

            # Read general information
            self._read_header()
            if self.verbosity > 1:
                print "# General Data from .frm file:"
                pprint(self.general_data)

            # Read key information
            self._read_keys()
            if self.verbosity > 1:
                print "# Index (key) Data from .frm file:"
                pprint(self.key_data)

            # Read default field values information
            self._read_default_values()

            # Read partition information
            self._read_engine_data()
            if self.verbosity > 1:
                print "# Engine string:", self.engine_str
                print "# Partition string:", self.partition_str

            # Read column information
            self._read_column_data()
            if self.verbosity > 1:
                print "# Column Data from .frm file:"
                pprint(self.column_data)
                print "# Number of columns:", self.num_cols
                pprint(self.column_data[1:])

            # Read comment
            self._read_comment()
            if self.verbosity > 1:
                print "# Comment:", self.comment_str

            if self.csi is not None and self.verbosity > 2:
                print "# Character sets read from server:"
                self.csi.print_charsets()

            create_table_statement = self._build_create_statement()
            if not self.quiet:
                print "# CREATE TABLE Statement:\n"
            print create_table_statement
            print

        elif file_type == _VIEW_TYPE:
            # Skip heading
            self.frm_file.read(8)
            view_data = {}
            for line in self.frm_file.readlines():
                field, value = line.strip('\n').split("=", 1)
                view_data[field] = value
            if self.verbosity > 1:
                pprint(view_data)
            if not self.quiet:
                print "# CREATE VIEW Statement:\n"
            print view_data['query']
            print
        else:
            raise UtilError("Invalid file type. Magic bytes = %02x" %
                            file_type)

        # Close file and exit
        self.frm_file.close()

    def change_storage_engine(self):
        """Change the storage engine in an .frm file to MEMORY

        This method edits a .frm file to change the storage engine to the
        the MEMORY engine.

        CAUTION: Method will change the contents of the file.

        Returns tuple - (original engine type, original engine name,
                         sever version from the file)
        """
        # Here we must change the code in position 0x03 to the engine code
        # and the engine string in body of the file (Calculated location)
        if self.verbosity > 1 and not self.quiet:
            print "# Changing engine for .frm file %s:" % self.frm_path

        # Fail if we cannot read the file
        try:
            self.frm_file = open(self.frm_path, "r+b")
        except Exception, error:
            raise UtilError("The file %s cannot be read.\n%s" %
                            (self.frm_path, error))

        # Read the file type
        file_type = struct.unpack("<H", self.frm_file.read(2))[0]

        # Do nothing if this is a view.
        if file_type == _VIEW_TYPE:
            return None

        # Abort if not table.
        if not file_type == _TABLE_TYPE:
            raise UtilError("Invalid file type. Magic bytes = %02x" %
                            file_type)

        # Replace engine value
        self.frm_file.read(1)  # skip 1 byte
        engine_type = struct.unpack("<B", self.frm_file.read(1))[0]

        # Read general information
        self._read_header()
        if self.verbosity > 1:
            print "# General Data from .frm file:"
            pprint(self.general_data)

        engine_str = ""
        server_version = str(self.general_data['MYSQL_VERSION_ID'])

        offset = self.general_data['IO_SIZE'] + \
            self.general_data['tmp_key_length'] + \
            self.general_data['rec_length']

        self.frm_file.seek(offset + 2, 0)

        engine_len = struct.unpack("<H", self.frm_file.read(2))[0]
        engine_str = "".join(struct.unpack("c" * engine_len,
                                           self.frm_file.read(engine_len)))
        if self.verbosity > 1:
            print "# Engine string:", engine_str

        # If this is a CSV storage engine, don't change the engine type
        # and instead create an empty .CSV file
        if engine_type == _DB_TYPE_CSV_DB:
            new_csv = os.path.splitext(self.frm_path)
            f_out = open(new_csv[0] + ".CSV", "w")
            f_out.close()
        elif engine_type == _DB_TYPE_ARCHIVE_DB:
            new_csv = os.path.splitext(self.frm_path)
            f_out = open(new_csv[0] + ".ARZ", "w")
            f_out.close()
        elif engine_type == _DB_TYPE_MRG_MYISAM:
            new_csv = os.path.splitext(self.frm_path)
            f_out = open(new_csv[0] + ".MRG", "w")
            f_out.close()
        elif engine_type == _DB_TYPE_BLACKHOLE_DB:
            pass  # Nothing to do for black hole storage engine
        else:
            # Write memory type
            self.frm_file.seek(3)
            self.frm_file.write(struct.pack("<B", 6))

            # Write memory name
            self.frm_file.seek(offset + 2, 0)
            self.frm_file.write(struct.pack("<H", 6))
            self.frm_file.write("MEMORY")

        # Close file and exit
        self.frm_file.close()

        return engine_type, engine_str, server_version

## parser.py
#
# Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
"""Module with parsers for General and Slow Query Log.
"""

import re
import decimal
import datetime

from mysql.utilities.exception import LogParserError


_DATE_PAT = r"\d{6}\s+\d{1,2}:\d{2}:\d{2}"

_HEADER_VERSION_CRE = re.compile(
    r"(.+), Version: (\d+)\.(\d+)\.(\d+)(?:-(\S+))?")
_HEADER_SERVER_CRE = re.compile(r"Tcp port:\s*(\d+)\s+Unix socket:\s+(.*)")

_SLOW_TIMESTAMP_CRE = re.compile(r"#\s+Time:\s+(" + _DATE_PAT + r")")
_SLOW_USERHOST_CRE = re.compile(r"#\s+User@Host:\s+"
                                r"(?:([\w\d]+))?\s*"
                                r"\[\s*([\w\d]+)\s*\]\s*"
                                r"@\s*"
                                r"([\w\d\.\-]*)\s*"
                                r"\[\s*([\d.]*)\s*\]\s*"
                                r"(?:Id\:\s*(\d+)?\s*)?")
_SLOW_STATS_CRE = re.compile(r"#\sQuery_time:\s(\d*\.\d{1,6})\s*"
                             r"Lock_time:\s(\d*\.\d{1,6})\s*"
                             r"Rows_sent:\s(\d*)\s*"
                             r"Rows_examined:\s(\d*)")

_GENERAL_ENTRY_CRE = re.compile(
    r'(?:(' + _DATE_PAT + r'))?\s*'
    r'(\d+)\s([\w ]+)\t*(?:(.+))?$')


class LogParserBase(object):
    """Base class for parsing MySQL log files

    LogParserBase should be inherited to create parsers for MySQL log files.
    This class has the following capabilities:

    - Take a stream and check whether it is a file type
    - Retrieve next line from stream
    - Parse header information from a log file (for General or Slow Query Log)
    - Implements the iterator protocol

    This class should not be used directly, but inhereted and extended to
    match the log file which needs to be parsed.
    """
    def __init__(self, stream):
        """Constructor

        stream[in]          A file type

        The stream argument must be a valid file type supporting for
        example the readline()-method. For example, the return of the buildin
        function open() can be used:
            LogParserBase(open("/path/to/mysql.log"))

        Raises LogParserError on errors.
        """
        self._stream = None
        self._version = None
        self._program = None
        self._port = None
        self._socket = None
        self._start_datetime = None
        self._last_seen_datetime = None

        # Check if we got a file type
        line = None
        try:
            self._stream = stream
            line = self._get_next_line()
        except AttributeError:
            raise LogParserError("Need a file type")

        # Not every log file starts with a header
        if line is not None and line.endswith('started with:'):
            self._parse_header(line)
        else:
            self._stream.seek(0)

    def _get_next_line(self):
        """Get next line from the log file

        This method reads the next line from the stream. Trailing
        newline (\n) and carraige return (\r) are removed.

        Returns next line as string or None
        """
        line = self._stream.readline()
        if not line:
            return None
        return line.rstrip('\r\n')

    def _parse_header(self, line):
        """Parse the header of a MySQL log file

        line[in]        A string, usually result of self._get_next_line()

        This method parses the header of a MySQL log file, that is the header
        found in the General and Slow Query log files. It sets attributes
        _version, _program, _port and _socket.
        Note that headers can repeat in a log file, for example, after a
        restart of the MySQL server.

        Example header:
        /usr/sbin/mysqld, Version: 5.5.17-log (Source distribution). started
        with:
        Tcp port: 0  Unix socket: /tmp/mysql.sock
        Time                 Id Command    Argument

        Raises LogParserError on errors.
        """
        if line is None:
            return
        # Header line containing executable and version, example:
        # /raid0/mysql/mysql/bin/mysqld,
        # Version: 5.5.17-log (Source distribution). started with:
        info = _HEADER_VERSION_CRE.match(line)
        if not info:
            raise LogParserError("Could not read executable and version from "
                                 "header")
        program, major, minor, patch, extra = info.groups()

        # Header line with server information, example:
        # Tcp port: 3306  Unix socket: /tmp/mysql.sock
        line = self._get_next_line()
        info = _HEADER_SERVER_CRE.match(line)
        if not info:
            raise LogParserError("Malformed server header line: %s" % line)
        tcp_port, unix_socket = info.groups()

        # Throw away column header line, example:
        # Time                 Id Command    Argument
        self._get_next_line()

        self._version = (int(major), int(minor), int(patch), extra)
        self._program = program
        self._port = int(tcp_port)
        self._socket = unix_socket

    @property
    def version(self):
        """Returns the MySQL server version

        This property returns a tuple descriving the version of the
        MySQL server producing the log file. The tuple looks like this:
            (major, minor, patch, extra)

        The extra part is optional and when not available will be None.
        Examples:
            (5,5,17,'log')
            (5,1,57,None)

        Note that the version can change in the same log file.

        Returns a tuple or None.
        """
        return self._version

    @property
    def program(self):
        """Returns the executable which wrote the log file

        This property returns the full path to the executable which
        produced the log file.

        Note that the executable can change in the same log file.

        Returns a string or None.
        """
        return self._program

    @property
    def port(self):
        """Returns the MySQL server TCP/IP port

        This property returns the TCP/IP port on which the MySQL server
        was listening.

        Note that the TCP/IP port can change in the same log file.

        Returns an integer or None.
        """
        return self._port

    @property
    def socket(self):
        """Returns the MySQL server UNIX socket

        This property returns full path to UNIX socket used the MySQL server
        to accept incoming connections on UNIX-like servers.

        Note that the UNIX socket location can change in the same log file.

        Returns a string or None.
        """
        return self._socket

    @property
    def start_datetime(self):
        """Returns timestamp of first read log entry

        This property returns the timestamp of the first read log entry.

        Returns datetime.datetime-object or None.
        """
        return self._start_datetime

    @property
    def last_seen_datetime(self):
        """Returns timestamp of last read log entry

        This property returns the timestamp of the last read log entry.

        Returns datetime.datetime-object or None
        """
        return self._last_seen_datetime

    def __iter__(self):
        """Class is iterable

        Returns a LogParserBase-object.
        """
        return self

    def next(self):
        """Returns the next log entry

        Raises StopIteration when no more entries are available.

        Returns a LogEntryBase-object.
        """
        entry = self._parse_entry()
        if entry is None:
            raise StopIteration
        return entry

    def _parse_entry(self):
        """Returns a parsed log entry
        """
        pass

    def __str__(self):
        """String representation of LogParserBase
        """
        return "<%(clsname)s, MySQL v%(version)s>" % dict(
            clsname=self.__class__.__name__,
            version='.'.join([str(v) for v in self._version[0:3]]) +
            (self._version[3] or '')
        )


class GeneralQueryLog(LogParserBase):
    """Class implementing a parser for the MySQL General Query Log

    The GeneralQueryLog-class implements a parse for the MySQL General Query
    Log and has the following capabilities:
    - Parse General Query Log entries
    - Possibility to handle special commands
    - Keep track of MySQL sessions and remove them
    - Process log headers found later in the log file
    """
    def __init__(self, stream):
        """Constructor

        stream[in]      file type

        Raises LogParserError on errors.
        """
        super(GeneralQueryLog, self).__init__(stream)
        self._sessions = {}
        self._cached_logentry = None

        self._commands = {
            #'Sleep': None,
            'Quit': self._handle_quit,
            'Init DB': self._handle_init_db,
            'Query': self._handle_multi_line,
            #'Field List': None,
            #'Create DB': None,
            #'Drop DB': None,
            #'Refresh': None,
            #'Shutdown': None,
            #'Statistics': None,
            #'Processlist': None,
            'Connect': self._handle_connect,
            #'Kill': None,
            #'Debug': None,
            #'Ping': None,
            #'Time': None,
            #'Delayed insert': None,
            #'Change user': None,
            #'Binlog Dump': None,
            #'Table Dump': None,
            #'Connect Out': None,
            #'Register Slave': None,
            'Prepare': self._handle_multi_line,
            'Execute': self._handle_multi_line,
            #'Long Data': None,
            #'Close stmt': None,
            #'Reset stmt': None,
            #'Set option': None,
            'Fetch': self._handle_multi_line,
            #'Daemon': None,
            #'Error': None,
        }

    def _new_session(self, session_id):
        """Create a new session using the given session ID

        session_id[in]      integer presenting a MySQL session

        Returns a dictionary.
        """
        self._sessions[session_id] = dict(
            database=None,
            user=None,
            host=None,
            time_last_action=None,
            to_delete=False
        )
        return self._sessions[session_id]

    @staticmethod
    def _handle_connect(entry, session, argument):
        """Handle a 'Connect'-command

        entry[in]       a GeneralQueryLogEntry-instance
        session[in]     a dictionary with current session information,
                        element of self._sessions
        argument[in]    a string, last part of a log entry

        This method reads user and database information from the argument of
        a 'Connect'-command. It sets the user, host and database for the
        current session and also sets the argument for the entry.

        """
        # Argument can be as follows:
        # root@localhost on test
        # root@localhost on
        try:
            connection, _, database = argument.split(' ')
        except ValueError:
            connection = argument.replace(' on', '')
            database = None
        session['user'], session['host'] = connection.split('@')
        session['database'] = database
        entry['argument'] = argument

    @staticmethod
    def _handle_init_db(entry, session, argument):
        """Handle an 'Init DB'-command

        entry[in]       a GeneralQueryLogEntry-instance
        session[in]     a dictionary with current session information,
                        element of self._sessions
        argument[in]    a string, last part of a log entry

        The argument parameter is always the database name.
        """
        # Example (of full line):
        #           3 Init DB   mysql
        session['database'] = argument
        entry['argument'] = argument

    def _handle_multi_line(self, entry, session, argument):
        """Handle a command which can span multiple lines

        entry[in]       a GeneralQueryLogEntry-instance
        session[in]     a dictionary with current session information,
                        element of self._sessions
        argument[in]    a string, last part of a log entry

        The argument parameter passed to this function is the last part of a
        General Query Log entry and usually is already the full query.

        This function's main purpose is to read log entries which span multiple
        lines, such as the Query and Prepare-commands.
        """
        # Examples:
        # 111205 10:01:14       6 Query SELECT Name FROM time_zone_name
        #                       WHERE Time_zone_id = 417
        # 111205 10:03:28       6 Query SELECT Name FROM time_zone_name
        # WHERE Time_zone_id = 417
        argument_parts = [argument, ]
        line = self._get_next_line()
        while line:
            if line.endswith('started with:'):
                self._cached_logentry = line
                break
            info = _GENERAL_ENTRY_CRE.match(line)
            if info is not None:
                self._cached_logentry = info.groups()
                break
            argument_parts.append(line)
            line = self._get_next_line()

        entry['argument'] = '\n'.join(argument_parts)

    @staticmethod
    def _handle_quit(entry, session, argument):
        """Handle the 'Quit'-command

        entry[in]       a GeneralQueryLogEntry-instance
        session[in]     a dictionary with current session information,
                        element of self._sessions
        argument[in]    a string, last part of a log entry

        This function sets a flag that the session can be removed from the
        session list.
        """
        # Example (of full line):
        # 111205 10:06:53       6 Quit
        session['to_delete'] = True

    def _parse_command(self, logentry, entry):
        """Parse a log entry from the General Query Log

        logentry[in]    a string or tuple
        entry[in]       an instance of GeneralQueryLogEntry

        The logentry-parameter is either a line read from the log file or
        the result of a previous attempt to read a command.
        The entry argument should be an instance of GeneralQueryLogEntry.
        It returns the entry or None if nothing could be read.

        Raises LogParserError on errors.

        Returns the GeneralQueryLogEntry-instance or None
        """
        if logentry is None:
            return None
        if isinstance(logentry, tuple):
            dt, session_id, command, argument = logentry
        elif logentry.endswith('started with:'):
            while logentry.endswith('started with:'):
                # We got a header
                self._parse_header(logentry)
                logentry = self._get_next_line()
                if logentry is None:
                    return None
            return self._parse_command(logentry, entry)
        else:
            info = _GENERAL_ENTRY_CRE.match(logentry)
            if info is None:
                raise LogParserError("Failed parsing command line: %s"
                                     % logentry)
            dt, session_id, command, argument = info.groups()
        self._cached_logentry = None

        session_id = int(session_id)
        entry['session_id'] = session_id
        try:
            session = self._sessions[session_id]
        except KeyError:
            session = self._new_session(session_id)

        entry['command'] = command
        if dt is not None:
            entry['datetime'] = datetime.datetime.strptime(dt,
                                                           "%y%m%d %H:%M:%S")
            session['time_last_action'] = entry['datetime']
        else:
            entry['datetime'] = session['time_last_action']

        try:
            self._commands[command](entry, session, argument)
        except KeyError:
            # Generic command
            entry['argument'] = argument

        for key in entry.keys():
            if key in session:
                entry[key] = session[key]

        if session['to_delete'] is True:
            del self._sessions[session_id]
            del session

        return entry

    def _parse_entry(self):
        """Returns a parsed log entry

        The method _parse_entry() uses _parse_command() to parse
        a General Query Log entry. It is used by the iterator protocol methods.

        Returns a GeneralQueryLogEntry-instance or None.
        """
        entry = GeneralQueryLogEntry()
        if self._cached_logentry is not None:
            self._parse_command(self._cached_logentry, entry)
            return entry
        else:
            line = self._get_next_line()
        if line is None:
            return None

        self._parse_command(line, entry)
        return entry


class SlowQueryLog(LogParserBase):
    """Class implementing a parser for the MySQL Slow Query Log

    The SlowQueryLog-class implements a parser for the MySQL Slow Query Log and
    has the following capabilities:
    - Parse Slow Query Log entries
    - Process log headers found later in the log file
    - Parse connection and temporal information
    - Get statistics of the slow query
    """
    def __init__(self, stream):
        """Constructor

        stream[in]      A file type

        The stream argument must be a valid file type supporting for
        example the readline()-method. For example, the return of the build-in
        function open() can be used:
            SlowQueryLog(open("/path/to/mysql-slow.log"))

        Raises LogParserError on errors.
        """
        super(SlowQueryLog, self).__init__(stream)
        self._cached_line = None
        self._current_database = None

    @staticmethod
    def _parse_line(regex, line):
        """Parses a log line using given regular expression

        regex[in]   a SRE_Match-object
        line[in]    a string

        This function takes a log line and matches the regular expresion given
        with the regex argument. It returns the result of
        re.MatchObject.groups(), which is a tuple.

        Raises LogParserError on errors.

        Returns a tuple.
        """
        info = regex.match(line)
        if info is None:
            raise LogParserError('Failed parsing Slow Query line: %s' %
                                 line[:30])
        return info.groups()

    def _parse_connection_info(self, line, entry):
        """Parses connection info

        line[in]    a string
        entry[in]   a SlowQueryLog-instance

        The line paramater should be a string, a line read from the Slow Query
        Log. The entry argument should be an instance of SlowQueryLogEntry.

        Raises LogParserError on failure.
        """
        # Example:
        # # User@Host: root[root] @ localhost [127.0.0.1]
        (priv_user,
         unpriv_user,
         host,
         ip,
         sid) = self._parse_line(_SLOW_USERHOST_CRE, line)

        entry['user'] = priv_user if priv_user else unpriv_user
        entry['host'] = host if host else ip
        entry['session_id'] = sid

    def _parse_timestamp(self, line, entry):
        """Parses a timestamp

        line[in]    a string
        entry[in]   a SlowQueryLog-instance

        The line paramater should be a string, a line read from the Slow Query
        Log. The entry argument should be an instance of SlowQueryLogEntry.

        Raises LogParserError on failure.
        """
        # Example:
        # # Time: 111206 11:55:54
        info = self._parse_line(_SLOW_TIMESTAMP_CRE, line)

        entry['datetime'] = datetime.datetime.strptime(info[0],
                                                       "%y%m%d %H:%M:%S")
        if self._start_datetime is None:
            self._start_datetime = entry['datetime']
            self._last_seen_datetime = entry['datetime']

    def _parse_statistics(self, line, entry):
        """Parses statistics information

        line[in]    a string
        entry[in]   a SlowQueryLog-instance

        The line paramater should be a string, a line read from the Slow Query
        Log. The entry argument should be an instance of SlowQueryLogEntry.

        Raises LogParserError on errors.
        """
        # Example statistic line:
        # Query_time: 0.101194  Lock_time: 0.000331 Rows_sent: 24
        # Rows_examined: 11624
        result = self._parse_line(_SLOW_STATS_CRE, line)

        entry['query_time'] = decimal.Decimal(result[0])
        entry['lock_time'] = decimal.Decimal(result[1])
        entry['rows_sent'] = int(result[2])
        entry['rows_examined'] = int(result[3])

    def _parse_query(self, line, entry):
        """Parses the query

        line[in]    a string
        entry[in]   a SlowQueryLog-instance

        The line paramater should be a string, a line read from the Slow Query
        Log. The entry argument should be an instance of SlowQueryLogEntry.

        Query entries in the Slow Query Log could span several lines. They can
        optionally start with a USE-command and have session variables, such as
        'timestamp', set before the actual query.
        """
        # Example:
        # SET timestamp=1323169459;
        # SELECT SCHEMA_NAME FROM INFORMATION_SCHEMA.SCHEMATA
        #    WHERE SCHEMA_NAME = 'mysql';
        # # User@Host: root[root] @ localhost [127.0.0.1]
        query = []
        while True:
            if line is None:
                break
            if line.startswith('use'):
                entry['database'] = self._current_database = line.split(' ')[1]
            elif line.startswith('SET timestamp='):
                entry['datetime'] = datetime.datetime.fromtimestamp(
                    int(line[14:].strip(';')))
            elif (line.startswith('# Time:') or line.startswith("# User@Host")
                  or line.endswith('started with:')):
                break
            query.append(line)
            line = self._get_next_line()

        if 'database' in entry:
            # This is not always correct: connections without current database
            # will get the database name of the previous query. However, it's
            # more likely current database is set. Fix would be that the server
            # includes a USE-statement for every entry.
            if (entry['database'] is None
               and self._current_database is not None):
                entry['database'] = self._current_database
        entry['query'] = '\n'.join(query)
        self._cached_line = line

    def _parse_entry(self):
        """Parse and returns an entry of the Slow Query Log

        Each entry of the slow log consists of:
        1. An optional time line
        2. A connection information line with user, hostname and database
        3. A line containing statistics for the query
        4. An optional "use <database>" line
        5. A line setting the timestamp, insert_id, and last_insert_id
           session variables
        6. An optional administartor command line "# administator command"
        7. An optional SQL statement or the query

        Returns a SlowQueryLogEntry-instance or None
        """
        if self._cached_line is not None:
            line = self._cached_line
            self._cached_line = None
        else:
            line = self._get_next_line()
        if line is None:
            return None

        while line.endswith('started with:'):
            # We got a header
            self._parse_header(line)
            line = self._get_next_line()
            if line is None:
                return None

        entry = SlowQueryLogEntry()

        if line.startswith('# Time:'):
            self._parse_timestamp(line, entry)
            line = self._get_next_line()

        if line.startswith('# User@Host:'):
            self._parse_connection_info(line, entry)
            line = self._get_next_line()

        if line.startswith('# Query_time:'):
            self._parse_statistics(line, entry)
            line = self._get_next_line()

        self._parse_query(line, entry)

        return entry


class LogEntryBase(dict):
    """Class inherited by GeneralQueryEntryLog and SlowQueryEntryLog

    This class has the following capabilities:
    - Inherits from dict
    - Dictionary elements can be accessed using attributes. For example,
      logentry['database'] is accessible like logentry.database

    Should not be used directly.
    """
    def __init__(self):
        super(LogEntryBase, self).__init__()
        self['datetime'] = None
        self['database'] = None
        self['user'] = None
        self['host'] = None
        self['session_id'] = None

    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("%s has no attribute '%s'" %
                                 (self.__class__.__name__, name))


class GeneralQueryLogEntry(LogEntryBase):
    """Class representing an entry of the General Query Log

    """
    def __init__(self):
        """Constructor

        GeneralQueryLogEntry inherits from LogEntryBase, which inherits from
        dict. Instances of GeneralQueryLogEntry can be used just like
        dictionaries.
        """
        super(GeneralQueryLogEntry, self).__init__()
        self['session_id'] = None
        self['command'] = None
        self['argument'] = None

    def __str__(self):
        """String representation of GeneralQueryLogEntry
        """
        param = self.copy()
        param['clsname'] = self.__class__.__name__
        try:
            if len(param['argument']) > 30:
                param['argument'] = param['argument'][:28] + '..'
        except TypeError:
            pass  # Nevermind when param['argument'] was not a string.
        try:
            param['datetime'] = param['datetime'].strftime("%Y-%m-%d %H:%M:%S")
        except AttributeError:
            param['datetime'] = ''
        return ("<%(clsname)s %(datetime)s [%(session_id)s]"
                " %(command)s: %(argument)s>" % param)


class SlowQueryLogEntry(LogEntryBase):
    """Class representing an entry of the Slow Query Log

    SlowQueryLogEntry inherits from LogEntryBase, which inherits from dict.
    Instances of SlowQueryLogEntry can be used just like dictionaries.
    """
    def __init__(self):
        """Constructor
        """
        super(SlowQueryLogEntry, self).__init__()
        self['query'] = None
        self['query_time'] = None
        self['lock_time'] = None
        self['rows_examined'] = None
        self['rows_sent'] = None

    def __str__(self):
        """String representation of SlowQueryLogEntry
        """
        param = self.copy()
        param['clsname'] = self.__class__.__name__
        try:
            param['datetime'] = param['datetime'].strftime("%Y-%m-%d %H:%M:%S")
        except AttributeError:
            param['datetime'] = ''
        return ("<%(clsname)s %(datetime)s [%(user)s@%(host)s] "
                "%(query_time)s/%(lock_time)s/%(rows_examined)s/%(rows_sent)s>"
                ) % param

## table.py
#
# Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#

"""
This module contains abstractions of a MySQL table and an index.
"""

import multiprocessing
import sys

from mysql.utilities.exception import UtilError
from mysql.connector.conversion import MySQLConverter
from mysql.utilities.common.format import print_list
from mysql.utilities.common.database import Database
from mysql.utilities.common.lock import Lock
from mysql.utilities.common.server import Server
from mysql.utilities.common.sql_transform import (convert_special_characters,
                                                  quote_with_backticks,
                                                  remove_backtick_quoting,
                                                  is_quoted_with_backticks)

# Constants
_MAXPACKET_SIZE = 1024 * 1024
_MAXBULK_VALUES = 25000
_MAXTHREADS_INSERT = 6
_MAXROWS_PER_THREAD = 100000
_MAXAVERAGE_CALC = 100

_FOREIGN_KEY_QUERY = """
  SELECT CONSTRAINT_NAME, COLUMN_NAME, REFERENCED_TABLE_SCHEMA,
         REFERENCED_TABLE_NAME, REFERENCED_COLUMN_NAME
  FROM INFORMATION_SCHEMA.KEY_COLUMN_USAGE
  WHERE TABLE_SCHEMA = '%s' AND TABLE_NAME = '%s' AND
        REFERENCED_TABLE_SCHEMA IS NOT NULL
"""


class Index(object):
    """
    The Index class encapsulates an index for a given table as defined by
    the output of SHOW INDEXES FROM. The class has the following
    capabilities:

        - Check for duplicates
        - Create DROP statement for index
        - Print index CREATE statement
    """

    def __init__(self, db, index_tuple, verbose=False):
        """Constructor

        db[in]             Name of database
        index_tuple[in]    A tuple from the get_tbl_indexes() result set
        verbose[in]        print extra data during operations (optional)
                           default value = False
        """

        # Initialize and save values
        self.db = db
        self.q_db = quote_with_backticks(db)
        self.verbose = verbose
        self.columns = []
        self.table = index_tuple[0]
        self.q_table = quote_with_backticks(index_tuple[0])
        self.unique = not index_tuple[1]
        self.name = index_tuple[2]
        self.q_name = quote_with_backticks(index_tuple[2])
        col = (index_tuple[4], index_tuple[7])
        self.columns.append(col)
        self.type = index_tuple[10]
        self.compared = False                    # mark as compared for speed
        self.duplicate_of = None                 # saves duplicate index
        if index_tuple[7] > 0:
            self.column_subparts = True          # check subparts e.g. a(20)
        else:
            self.column_subparts = False

    @staticmethod
    def __cmp_columns(col_a, col_b):
        """Compare two columns on name and subpart lengths if present

        col_a[in]          First column to compare
        col_b[in]          Second column to compare

        Returns True if col_a has the same name as col_b and if the
        subparts are col_a.sub <= col_b.sub.
        """

        sz_this = col_a[1]
        sz_that = col_b[1]
        # if column has the same name
        if col_a[0] == col_b[0]:
            # if they both have sub_parts, compare them
            if sz_this and sz_that:
                if sz_this <= sz_that:
                    return True
                else:
                    return False
            # if this index has a sub_part and the other does
            # not, it is potentially redundant
            elif sz_this and sz_that is None:
                return True
            # if neither have sub_parts, it is a match
            elif sz_this is None and sz_that is None:
                return True
        else:
            return False  # no longer a duplicate

    def __check_column_list(self, index):
        """Compare the column list of this index with another

        index[in]          Instance of Index to compare

        Returns True if column list is a subset of index.
        """

        # Uniqueness counts - can't be duplicate if uniquess differs
        #                     except for primary keys which are always unique
        if index.name != "PRIMARY":
            if self.unique != index.unique:
                return False
        num_cols_this = len(self.columns)
        num_cols_that = len(index.columns)
        num_cols_same = 0
        if self.type == "BTREE":
            i = 0
            while (i < num_cols_this) and (i < num_cols_that):
                if num_cols_same <= i:  # Ensures first N cols are the same
                    if self.__cmp_columns(self.columns[i], index.columns[i]):
                        num_cols_same = num_cols_same + 1
                    else:
                        break
                i = i + 1
        else:  # HASH, RTREE, FULLTEXT
            if (self.type == "FULLTEXT") and (num_cols_this != num_cols_that):
                return False
            i = 0
            while (i < num_cols_this) and (i < num_cols_that):
                if self.__cmp_columns(self.columns[i], index.columns[i]):
                    num_cols_same = num_cols_same + 1
                else:  # Ensures column lists must match
                    num_cols_same = 0
                    break
                i = i + 1
        if (num_cols_same > 0) and (num_cols_this <= num_cols_that):
            return True
        return False

    def is_duplicate(self, index):
        """Compare this index with another

        index[in]          Instance of Index to compare

        Returns True if this index is a subset of the Index presented.
        """

        # Don't compare the same index - no two indexes can have the same name
        if self.name == index.name:
            return False
        else:
            return self.__check_column_list(index)

    def contains_columns(self, col_names):
        """Check if the current index contains the columns of the given index.

        Returns True if it contains all the columns of the given index,
        otherwise False.
        """
        if len(self.columns) < len(col_names):
            # If has less columns than given index it does not contain all.
            return False
        else:
            this_col_names = [col[0] for col in self.columns]
            # Check if all index column are included in current one..
            for col_name in col_names:
                if col_name not in this_col_names:
                    return False  # found one column not included.

        # Pass previous verification; contains all the columns of given index.
        return True

    def add_column(self, column, sub_part):
        """Add a column to the list of columns for this index

        column[in]         Column to add
        sub_part[in]       Sub part of colunm e.g. a(20)
        """

        col = (column, sub_part)
        if sub_part > 0:
            self.column_subparts = True
        self.columns.append(col)

    def get_drop_statement(self):
        """Get the drop statement for this index

        Note: Ignores PRIMARY key indexes.

        Returns the DROP statement for this index.
        """
        if self.name == "PRIMARY":
            return None
        query_str = "ALTER TABLE {db}.{table} DROP INDEX {name}".format(
            db=self.q_db, table=self.q_table, name=self.q_name
        )
        return query_str

    def get_remove_columns_statement(self, col_names):
        """Get the ALTER TABLE statement to remove columns for this index.

        col_names[in]   list of columns names to remove from the index.

        Returns the ALTER TABLE statement (DROP/ADD) to remove the given
        columns names from the index.
        """
        # Create the new columns list for the index.
        idx_cols = [col[0] for col in self.columns if col[0] not in col_names]
        if not idx_cols:
            # Return a DROP statement if no columns are left.
            query_str = "ALTER TABLE {db}.{table} DROP INDEX {name}".format(
                db=self.q_db, table=self.q_table, name=self.q_name
            )
        else:
            # Otherwise, return a DROP/ADD statement with remaining columns.
            idx_cols_str = ', '.join(idx_cols)
            query_str = ("ALTER TABLE {db}.{table} DROP INDEX {name}, "
                         "ADD INDEX {name} ({cols})".format(db=self.q_db,
                                                            table=self.q_table,
                                                            name=self.q_name,
                                                            cols=idx_cols_str))
        return query_str

    def __get_column_list(self, backtick_quoting=True):
        """Get the column list for an index

        This method is used to print the CREATE and DROP statements.

        backtick_quoting[in]    Indicates if the columns names are to be quoted
                                with backticks or not. By default: True.

        Returns a string representing the list of columns for a
        column list. e.g. 'a, b(10), c'
        """
        col_list = []
        for col in self.columns:
            name, sub_part = (col[0], col[1])
            if backtick_quoting:
                name = quote_with_backticks(name)
            if sub_part > 0:
                col_str = "{0}({1})".format(name, sub_part)
            else:
                col_str = name
            col_list.append(col_str)
        return ', '.join(col_list)

    def print_index_sql(self):
        """Print the CREATE INDEX for indexes and ALTER TABLE for a primary key
        """
        if self.name == "PRIMARY":
            print("ALTER TABLE {db}.{table} ADD PRIMARY KEY ({cols})"
                  "".format(db=self.q_db, table=self.q_table,
                            cols=self.__get_column_list()))
        else:
            create_str = ("CREATE {unique}{fulltext}INDEX {name} ON "
                          "{db}.{table} ({cols}) {using}")
            unique_str = 'UNIQUE ' if self.unique else ''
            fulltext_str = 'FULLTEXT ' if self.type == 'FULLTEXT' else ''
            if (self.type == "BTREE") or (self.type == "RTREE"):
                using_str = 'USING {0}'.format(self.type)
            else:
                using_str = ''
            print(create_str.format(unique=unique_str, fulltext=fulltext_str,
                                    name=self.q_name, db=self.q_db,
                                    table=self.q_table,
                                    cols=self.__get_column_list(),
                                    using=using_str))

    def get_row(self):
        """Return index information as a list of columns for tabular output.
        """
        cols = self.__get_column_list(backtick_quoting=False)
        return (self.db, self.table, self.name, self.type, cols)


class Table(object):
    """
    The Table class encapsulates a table for a given database. The class
    has the following capabilities:

        - Check to see if the table exists
        - Check indexes for duplicates and redundancies
        - Print list of indexes for the table
        - Extract table data
        - Import table data
        - Copy table data
    """

    def __init__(self, server1, name, options=None):
        """Constructor

        server[in]         A Server object
        name[in]           Name of table in the form (db.table)
        options[in]        options for class: verbose, quiet, get_cols,
            quiet     If True, do not print information messages
            verbose   print extra data during operations (optional)
                      (default is False)
            get_cols  If True, get the column metadata on construction
                      (default is False)
        """
        if options is None:
            options = {}
        self.verbose = options.get('verbose', False)
        self.quiet = options.get('quiet', False)
        self.server = server1

        # Keep table identifier considering backtick quotes
        if is_quoted_with_backticks(name):
            self.q_table = name
            self.q_db_name, self.q_tbl_name = Database.parse_object_name(name)
            self.db_name = remove_backtick_quoting(self.q_db_name)
            self.tbl_name = remove_backtick_quoting(self.q_tbl_name)
            self.table = ".".join([self.db_name, self.tbl_name])
        else:
            self.table = name
            self.db_name, self.tbl_name = Database.parse_object_name(name)
            self.q_db_name = quote_with_backticks(self.db_name)
            self.q_tbl_name = quote_with_backticks(self.tbl_name)
            self.q_table = ".".join([self.q_db_name, self.q_tbl_name])
        self.obj_type = "TABLE"
        self.pri_idx = None

        # We store each type of index in a separate list to make it easier
        # to manipulate
        self.btree_indexes = []
        self.hash_indexes = []
        self.rtree_indexes = []
        self.fulltext_indexes = []
        self.text_columns = []
        self.blob_columns = []
        self.column_format = None
        self.column_names = []
        self.q_column_names = []
        if options.get('get_cols', False):
            self.get_column_metadata()
        self.dest_vals = None
        self.storage_engine = None

        # Get max allowed packet
        res = self.server.exec_query("SELECT @@session.max_allowed_packet")
        if res:
            self.max_packet_size = res[0][0]
        else:
            self.max_packet_size = _MAXPACKET_SIZE
        # Watch for invalid values
        if self.max_packet_size > _MAXPACKET_SIZE:
            self.max_packet_size = _MAXPACKET_SIZE

        self._insert = "INSERT INTO %s.%s VALUES "
        self.query_options = {  # Used for skipping fetch of rows
            'fetch': False
        }

    def exists(self, tbl_name=None):
        """Check to see if the table exists

        tbl_name[in]       table name (db.table)
                           (optional) If omitted, operation is performed
                           on the class instance table name.

        return True = table exists, False = table does not exist
        """

        db, table = (None, None)
        if tbl_name:
            db, table = Database.parse_object_name(tbl_name)
        else:
            db = self.db_name
            table = self.tbl_name
        res = self.server.exec_query("SELECT TABLE_NAME " +
                                     "FROM INFORMATION_SCHEMA.TABLES " +
                                     "WHERE TABLE_SCHEMA = '%s'" % db +
                                     " and TABLE_NAME = '%s'" % table)

        return (res is not None and len(res) >= 1)

    def get_column_metadata(self, columns=None):
        """Get information about the table for the bulk insert operation.

        This method builds lists that describe the metadata of the table. This
        includes lists for:

          column names
          column format for building VALUES clause
          blob fields - for use in generating INSERT/UPDATE for blobs
          text fields - for use in checking for single quotes

        columns[in]        if None, use EXPLAIN else use column list.
        """

        if columns is None:
            columns = self.server.exec_query("explain %s" % self.q_table)
        stop = len(columns)
        self.column_names = []
        self.q_column_names = []
        col_format_values = [''] * stop
        if columns is not None:
            for col in range(0, stop):
                if is_quoted_with_backticks(columns[col][0]):
                    self.column_names.append(
                        remove_backtick_quoting(columns[col][0]))
                    self.q_column_names.append(columns[col][0])
                else:
                    self.column_names.append(columns[col][0])
                    self.q_column_names.append(
                        quote_with_backticks(columns[col][0]))
                col_type_prefix = columns[col][1][0:4].lower()
                if col_type_prefix in ('varc', 'char', 'enum', 'set('):
                    self.text_columns.append(col)
                    col_format_values[col] = "'%s'"
                elif col_type_prefix in ("blob", "text"):
                    self.blob_columns.append(col)
                    col_format_values[col] = "%s"
                elif col_type_prefix in ("date", "time"):
                    col_format_values[col] = "'%s'"
                else:
                    col_format_values[col] = "%s"
        self.column_format = "%s%s%s" % \
                             (" (", ', '.join(col_format_values), ")")

    def get_col_names(self, quote_backticks=False):
        """Get column names for the export operation.

        quote_backticks[in]    If True the column names will be quoted with
                               backticks. Default is False.

        Return (list) column names
        """

        if self.column_format is None:
            self.column_names = []
            self.q_column_names = []
            rows = self.server.exec_query("explain %s" % self.q_table)
            for row in rows:
                self.column_names.append(row[0])
                self.q_column_names.append(quote_with_backticks(row[0]))

        return self.q_column_names if quote_backticks else self.column_names

    def _build_update_blob(self, row, new_db, name, blob_col):
        """Build an UPDATE statement to update blob fields.

        row[in]            a row to process
        new_db[in]         new database name
        name[in]           name of the table
        conn_val[in]       connection information for the destination server
        query[in]          the INSERT string for executemany()
        blob_col[in]       number of the column containing the blob

        Returns UPDATE string
        """
        if self.column_format is None:
            self.get_column_metadata()

        blob_insert = "UPDATE %s.%s SET " % (new_db, name)
        where_values = []
        do_commas = False
        has_data = False
        stop = len(row)
        for col in range(0, stop):
            col_name = self.q_column_names[col]
            if col in self.blob_columns:
                if row[col] is not None and len(row[col]) > 0:
                    if do_commas:
                        blob_insert += ", "
                    blob_insert += "%s = " % col_name + "%s" % \
                                   MySQLConverter().quote(row[col])
                    has_data = True
                    do_commas = True
            else:
                # Convert None values to NULL (not '' to NULL)
                if row[col] is None:
                    value = 'NULL'
                else:
                    value = "'{0}'".format(row[col])
                where_values.append("{0} = {1}".format(col_name, value))
        if has_data:
            return blob_insert + " WHERE " + " AND ".join(where_values) + ";"
        return None

    def get_column_string(self, row, new_db):
        """Return a formatted list of column data.

        row[in]            a row to process
        new_db[in]         new database name

        Returns (string) column list
        """

        if self.column_format is None:
            self.get_column_metadata()

        blob_inserts = []
        values = list(row)

        # Find blobs
        for col in self.blob_columns:
            # Save blob updates for later...
            blob = self._build_update_blob(row, new_db, self.q_tbl_name, col)
            if blob is not None:
                blob_inserts.append(blob)
            values[col] = "NULL"

        # Replace single quotes located in the value for a text field with the
        # correct special character escape sequence. This fixes SQL errors
        # related to using single quotes in a string value that is single
        # quoted. For example, 'this' is it' is changed to 'this\' is it'
        for col in self.text_columns:
            #Check if the value is not None before replacing quotes
            if values[col]:
                # Apply escape sequences to special characters
                values[col] = convert_special_characters(values[col])

        # Build string (add quotes to "string" like types)
        val_str = self.column_format % tuple(values)

        # Change 'None' occurrences with "NULL"
        val_str = val_str.replace(", None", ", NULL")
        val_str = val_str.replace("(None", "(NULL")
        val_str = val_str.replace(", 'None'", ", NULL")
        val_str = val_str.replace("('None'", "(NULL")

        return (val_str, blob_inserts)

    def make_bulk_insert(self, rows, new_db, columns_names=None):
        """Create bulk insert statements for the data

        Reads data from a table (rows) and builds group INSERT statements for
        bulk inserts.

        Note: This method does not print any information to stdout.

        rows[in]           a list of rows to process
        new_db[in]         new database name

        Returns (tuple) - (bulk insert statements, blob data inserts)
        """

        if self.column_format is None:
            self.get_column_metadata()

        data_inserts = []
        blob_inserts = []
        row_count = 0
        data_size = 0
        val_str = None
        for row in rows:
            if row_count == 0:
                if columns_names:
                    insert_str = "INSERT INTO {0}.{1} ({2}) VALUES ".format(
                        new_db, self.q_tbl_name, ", ".join(columns_names)
                    )
                else:
                    insert_str = self._insert % (new_db, self.q_tbl_name)
                if val_str:
                    row_count += 1
                    insert_str += val_str
                data_size = len(insert_str)

            col_data = self.get_column_string(row, new_db)
            val_str = col_data[0]

            if len(col_data[1]) > 0:
                blob_inserts.extend(col_data[1])

            row_size = len(val_str)
            next_size = data_size + row_size + 3
            if (row_count >= _MAXBULK_VALUES) or \
               (next_size > (int(self.max_packet_size) - 512)):  # add buffer
                data_inserts.append(insert_str)
                row_count = 0
            else:
                row_count += 1
                if row_count > 1:
                    insert_str += ", "
                insert_str += val_str
                data_size += row_size + 3

        if row_count > 0:
            data_inserts.append(insert_str)

        return (data_inserts, blob_inserts)

    def get_storage_engine(self):
        """Get the storage engine (in UPPERCASE) for the table.

        Returns the name in UPPERCASE of the storage engine use for the table
        or None if the information is not found.
        """
        self.server.exec_query("USE {0}".format(self.q_db_name),
                               self.query_options)
        res = self.server.exec_query(
            "SHOW TABLE STATUS WHERE name = '{0}'".format(self.tbl_name)
        )
        try:
            # Return store engine converted to UPPER cases.
            return res[0][1].upper() if res[0][1] else None
        except IndexError:
            # Return None if table status information is not available.
            return None

    def get_segment_size(self, num_conn=1):
        """Get the segment size based on number of connections (threads).

        num_conn[in]       Number of threads(connections) to use
                           Default = 1 (one large segment)

        Returns (int) segment_size

                Note: if num_conn <= 1 - returns number of rows
        """

        # Get number of rows
        num_rows = 0
        try:
            res = self.server.exec_query("USE %s" % self.q_db_name,
                                         self.query_options)
        except:
            pass
        res = self.server.exec_query("SHOW TABLE STATUS LIKE '%s'" %
                                     self.tbl_name)
        if res:
            num_rows = int(res[0][4])

        if num_conn <= 1:
            return num_rows

        # Calculate number of threads and segment size to fetch
        thread_limit = num_conn
        if thread_limit > _MAXTHREADS_INSERT:
            thread_limit = _MAXTHREADS_INSERT
        if num_rows > (_MAXROWS_PER_THREAD * thread_limit):
            max_threads = thread_limit
        else:
            max_threads = int(num_rows / _MAXROWS_PER_THREAD)
        if max_threads == 0:
            max_threads = 1
        if max_threads > 1 and self.verbose:
            print "# Using multi-threaded insert option. Number of " \
                  "threads = %d." % max_threads
        return (num_rows / max_threads) + max_threads

    def _bulk_insert(self, rows, new_db, destination=None):
        """Import data using bulk insert

        Reads data from a table and builds group INSERT statements for writing
        to the destination server specified (new_db.name).

        This method is designed to be used in a thread for parallel inserts.
        As such, it requires its own connection to the destination server.

        Note: This method does not print any information to stdout.

        rows[in]           a list of rows to process
        new_db[in]         new database name
        destination[in]    the destination server
        """
        if self.dest_vals is None:
            self.dest_vals = self.get_dest_values(destination)

        # Spawn a new connection
        server_options = {
            'conn_info': self.dest_vals,
            'role': "thread",
        }
        dest = Server(server_options)
        dest.connect()

        # Issue the write lock
        lock_list = [("%s.%s" % (new_db, self.q_tbl_name), 'WRITE')]
        my_lock = Lock(dest, lock_list, {'locking': 'lock-all', })

        # First, turn off foreign keys if turned on
        dest.disable_foreign_key_checks(True)

        if self.column_format is None:
            self.get_column_metadata()

        data_lists = self.make_bulk_insert(rows, new_db)
        insert_data = data_lists[0]
        blob_data = data_lists[1]

        # Insert the data first
        for data_insert in insert_data:
            try:
                dest.exec_query(data_insert, self.query_options)
            except UtilError, e:
                raise UtilError("Problem inserting data. "
                                "Error = %s" % e.errmsg)

        # Now insert the blob data if there is any
        for blob_insert in blob_data:
            try:
                dest.exec_query(blob_insert, self.query_options)
            except UtilError, e:
                raise UtilError("Problem updating blob field. "
                                "Error = %s" % e.errmsg)

        # Now, turn on foreign keys if they were on at the start
        dest.disable_foreign_key_checks(False)
        my_lock.unlock()
        del dest

    def insert_rows(self, rows, new_db, destination=None, spawn=False):
        """Insert rows in the table using bulk copy.

        This method opens a new connect to the destination server to insert
        the data with a bulk copy. If spawn is True, the method spawns a new
        process and returns it. This allows for using a multi-threaded insert
        which can be faster on some platforms. If spawn is False, the method
        will open a new connection to insert the data.

        num_conn[in]       Number of threads(connections) to use for insert
        rows[in]           List of rows to insert
        new_db[in]         Rename the db to this name
        destination[in]    Destination server
                           Default = None (copy to same server)
        spawn[in]          If True, spawn a new process for the insert
                           Default = False

        Returns If spawn == True, process
                If spawn == False, None
        """

        if self.column_format is None:
            self.get_column_metadata()

        if self.dest_vals is None:
            self.dest_vals = self.get_dest_values(destination)

        proc = None
        if spawn:
            proc = multiprocessing.Process(target=self._bulk_insert,
                                           args=(rows, new_db, destination))
        else:
            self._bulk_insert(rows, new_db, destination)

        return proc

    def _clone_data(self, new_db):
        """Clone table data.

        This method will copy all of the data for a table
        from the old database to the new database on the same server.

        new_db[in]         New database name for the table
        """
        query_str = "INSERT INTO %s.%s SELECT * FROM %s.%s" % \
                    (new_db, self.q_tbl_name, self.q_db_name, self.q_tbl_name)
        if self.verbose and not self.quiet:
            print query_str
        self.server.exec_query(query_str)

    def copy_data(self, destination, cloning=False, new_db=None,
                  connections=1):
        """Retrieve data from a table and copy to another server and database.

        Reads data from a table and inserts the correct INSERT statements into
        the file provided.

        Note: if connections < 1 - retrieve the data one row at-a-time

        destination[in]    Destination server
        cloning[in]        If True, we are copying on the same server
        new_db[in]         Rename the db to this name
        connections[in]    Number of threads(connections) to use for insert
        """

        if new_db is None:
            new_db = self.q_db_name
        else:
            # If need quote new_db identifier with backticks
            if not is_quoted_with_backticks(new_db):
                new_db = quote_with_backticks(new_db)

        num_conn = int(connections)

        if cloning:
            self._clone_data(new_db)
        else:
            # Read and copy the data
            pthreads = []
            for rows in self.retrieve_rows(num_conn):
                p = self.insert_rows(rows, new_db, destination, num_conn > 1)
                if p is not None:
                    p.start()
                    pthreads.append(p)

            if num_conn > 1:
                # Wait for all threads to finish
                for p in pthreads:
                    p.join()

    def retrieve_rows(self, num_conn=1):
        """Retrieve the table data in rows.

        This method can be used to retrieve rows from a table as a generator
        specifying how many rows to retrieve at one time (segment_size is
        calculated based on number of rows / number of connections).

        Note: if num_conn < 1 - retrieve the data one row at-a-time

        num_conn[in]       Number of threads(connections) to use
                           Default = 1 (one large segment)

        Returns (yield) row data
        """

        if num_conn > 1:
            # Only get the segment size when needed.
            segment_size = self.get_segment_size(num_conn)

        # Execute query to get all of the data
        cur = self.server.exec_query("SELECT * FROM {0}".format(self.q_table),
                                     self.query_options)

        while True:
            rows = None
            if num_conn < 1:
                rows = []
                row = cur.fetchone()
                if row is None:
                    raise StopIteration()
                rows.append(row)
                #print "ROWS 1:", rows
            elif num_conn == 1:
                rows = cur.fetchall()
                #print "ROWS 2:", rows
                yield rows
                raise StopIteration()
            else:
                rows = cur.fetchmany(segment_size)
                if not rows:
                    raise StopIteration()
                #print "ROWS 3:", rows
            if rows is None:
                raise StopIteration()
            yield rows

        cur.close()

    def get_dest_values(self, destination=None):
        """Get the destination connection values if not already set.

        destination[in]    Connection values for destination server

        Returns connection values for destination if set or self.server
        """
        # Get connection to database
        if destination is None:
            conn_val = {
                "host": self.server.host,
                "user": self.server.user,
                "passwd": self.server.passwd,
                "unix_socket": self.server.socket,
                "port": self.server.port
            }
        else:
            conn_val = {
                "host": destination.host,
                "user": destination.user,
                "passwd": destination.passwd,
                "unix_socket": destination.socket,
                "port": destination.port
            }
        return conn_val

    def get_tbl_indexes(self):
        """Return a result set containing all indexes for a given table

        Returns result set
        """
        res = self.server.exec_query("SHOW INDEXES FROM %s" % self.q_table)
        return res

    def get_tbl_foreign_keys(self):
        """Return a result set containing all foreign keys for the table

        Returns result set
        """
        res = self.server.exec_query(_FOREIGN_KEY_QUERY % (self.db_name,
                                                           self.tbl_name))
        return res

    @staticmethod
    def __append(indexes, index):
        """Encapsulated append() method to ensure the primary key index
        is placed at the front of the list.
        """

        # Put the primary key first so that it can be compared to all indexes
        if index.name == "PRIMARY":
            indexes.insert(0, index)
        else:
            indexes.append(index)

    @staticmethod
    def __check_index(index, indexes, master_list):
        """Check a single index for duplicate or redundancy against a list
        of other Indexes.

        index[in]          The Index to compare
        indexes[in]        A list of Index instances to compare
        master_list[in]    A list of know duplicate Index instances

        Returns a tuple of whether duplicates are found and if found the
        list of duplicate indexes for this table
        """

        duplicates_found = False
        duplicate_list = []
        if indexes and index:
            for idx in indexes:
                # Don't compare b == a when a == b has already occurred
                if not index.compared and idx.is_duplicate(index):
                    # make sure we haven't already found this match
                    if not idx.column_subparts:
                        idx.compared = True
                    if not (idx in master_list):
                        duplicates_found = True
                        idx.duplicate_of = index
                        duplicate_list.append(idx)
        return (duplicates_found, duplicate_list)

    def __check_index_list(self, indexes):
        """Check a list of Index instances for duplicates.

        indexes[in]        A list of Index instances to compare

        Returns a tuple of whether duplicates are found and if found the
        list of duplicate indexes for this table
        """

        duplicates_found = False
        duplicate_list = []
        # Caller must ensure there are at least 2 elements in the list.
        if len(indexes) < 2:
            return (False, None)
        for index in indexes:
            res = self.__check_index(index, indexes, duplicate_list)
            if res[0]:
                duplicates_found = True
                duplicate_list.extend(res[1])
        return (duplicates_found, duplicate_list)

    def __check_clustered_index_list(self, indexes):
        """ Check for indexes containing the clustered index from the list.

        indexes[in]     list of indexes instances to check.

        Returns the list of indexes that contain the clustered index or
        None (if none found).
        """
        redundant_indexes = []
        if not self.pri_idx:
            self.get_primary_index()
        pri_idx_cols = [col[0] for col in self.pri_idx]
        for index in indexes:
            if index.name == 'PRIMARY':
                # Skip primary key.
                continue
            elif index.contains_columns(pri_idx_cols):
                redundant_indexes.append(index)

        return redundant_indexes if redundant_indexes else []

    def _get_index_list(self):
        """Get the list of indexes for a table.
        Returns list containing indexes.
        """
        rows = self.get_tbl_indexes()
        return rows

    def get_primary_index(self):
        """Retrieve the primary index columns for this table.
        """
        pri_idx = []

        rows = self.server.exec_query("EXPLAIN " + self.q_table)

        # Return False if no indexes found.
        if not rows:
            return pri_idx

        for row in rows:
            if row[3] == 'PRI':
                pri_idx.append(row)

        self.pri_idx = pri_idx

        return pri_idx

    def get_indexes(self):
        """Retrieve the indexes from the server and load them into lists
        based on type.

        Returns True - table has indexes, False - table has no indexes
        """

        self.btree_indexes = []
        self.hash_indexes = []
        self.rtree_indexes = []
        self.fulltext_indexes = []

        if self.verbose:
            print "# Getting indexes for %s" % (self.table)
        rows = self._get_index_list()

        # Return False if no indexes found.
        if not rows:
            return False
        idx = None
        prev_name = ""
        for row in rows:
            if (row[2] != prev_name) or (prev_name == ""):
                prev_name = row[2]
                idx = Index(self.db_name, row)
                if idx.type == "BTREE":
                    self.__append(self.btree_indexes, idx)
                elif idx.type == "HASH":
                    self.__append(self.hash_indexes, idx)
                elif idx.type == "RTREE":
                    self.__append(self.rtree_indexes, idx)
                else:
                    self.__append(self.fulltext_indexes, idx)
            elif idx:
                idx.add_column(row[4], row[7])
        return True

    def check_indexes(self, show_drops=False):
        """Check for duplicate or redundant indexes and display all matches

        show_drops[in]     (optional) If True the DROP statements are printed

        Note: You must call get_indexes() prior to calling this method. If
        get_indexes() is not called, no duplicates will be found.
        """

        dupes = []
        res = self.__check_index_list(self.btree_indexes)
        # if there are duplicates, add them to the dupes list
        if res[0]:
            dupes.extend(res[1])
        res = self.__check_index_list(self.hash_indexes)
        # if there are duplicates, add them to the dupes list
        if res[0]:
            dupes.extend(res[1])
        res = self.__check_index_list(self.rtree_indexes)
        # if there are duplicates, add them to the dupes list
        if res[0]:
            dupes.extend(res[1])
        # We sort the fulltext index columns - easier to do it once here
        for index in self.fulltext_indexes:
            cols = index.columns
            cols.sort(key=lambda cols: cols[0])
        res = self.__check_index_list(self.fulltext_indexes)
        # if there are duplicates, add them to the dupes list
        if res[0]:
            dupes.extend(res[1])

        # Check if secondary keys contains the clustered index (i.e. Primary
        # key). In InnoDB, each record in a secondary index contains the
        # primary key columns. Therefore the use of keys that include the
        # primary key might be redundant.
        redundant_idxs = []
        if not self.storage_engine:
            self.storage_engine = self.get_storage_engine()
        if self.storage_engine == 'INNODB':
            all_indexes = self.btree_indexes
            all_indexes.extend(self.hash_indexes)
            all_indexes.extend(self.rtree_indexes)
            all_indexes.extend(self.fulltext_indexes)
            redundant_idxs = self.__check_clustered_index_list(all_indexes)

        # Print duplicate and redundant keys on composite indexes.
        if len(dupes) > 0:
            plural_1, verb_conj, plural_2 = (
                ('', 'is a', '') if len(dupes) == 1 else ('es', 'are', 's')
            )
            print("# The following index{0} {1} duplicate{2} or redundant "
                  "for table {3}:".format(plural_1, verb_conj, plural_2,
                                          self.table))
            for index in dupes:
                print("#")
                index.print_index_sql()
                print("#     may be redundant or duplicate of:")
                index.duplicate_of.print_index_sql()
            if show_drops:
                print("#\n# DROP statement{0}:\n#".format(plural_2))
                for index in dupes:
                    print("{0};".format(index.get_drop_statement()))
                print("#")

        # Print redundant indexes containing clustered key.
        if redundant_idxs:
            plural, verb_conj, plural_2 = (
                ('', 's', '') if len(redundant_idxs) == 1 else ('es', '', 's')
            )

            print("# The following index{0} for table {1} contain{2} the "
                  "clustered index and might be redundant:".format(plural,
                                                                   self.table,
                                                                   verb_conj))
            for index in redundant_idxs:
                print("#")
                index.print_index_sql()
            if show_drops:
                print("#\n# DROP/ADD statement{0}:\n#".format(plural_2))
                # Get columns from primary key to be removed.
                pri_idx_cols = [col[0] for col in self.pri_idx]
                for index in redundant_idxs:
                    print("{0};".format(
                        index.get_remove_columns_statement(pri_idx_cols)
                    ))
                print("#")

        if not self.quiet and not dupes and not redundant_idxs:
            print("# Table {0} has no duplicate nor redundant "
                  "indexes.".format(self.table))

    def show_special_indexes(self, fmt, limit, best=False):
        """Display a list of the best or worst queries for this table.

        This shows the best (first n) or worst (last n) performing queries
        for a given table.

        fmt[in]            format out output = sql, table, tab, csv
        limit[in]          number to limit the display
        best[in]           (optional) if True, print best performing indexes
                                      if False, print worst performing indexes
        """

        _QUERY = """
            SELECT
                t.TABLE_SCHEMA AS `db`, t.TABLE_NAME AS `table`,
                s.INDEX_NAME AS `index name`, s.COLUMN_NAME AS `field name`,
                s.SEQ_IN_INDEX `seq in index`, s2.max_columns AS `# cols`,
                s.CARDINALITY AS `card`, t.TABLE_ROWS AS `est rows`,
                ROUND(((s.CARDINALITY / IFNULL(
                IF(t.TABLE_ROWS < s.CARDINALITY, s.CARDINALITY, t.TABLE_ROWS),
                0.01)) * 100), 2) AS `sel_percent`
            FROM INFORMATION_SCHEMA.STATISTICS s
                INNER JOIN INFORMATION_SCHEMA.TABLES t
                ON s.TABLE_SCHEMA = t.TABLE_SCHEMA
                AND s.TABLE_NAME = t.TABLE_NAME
            INNER JOIN (
                SELECT TABLE_SCHEMA, TABLE_NAME, INDEX_NAME,
                    MAX(SEQ_IN_INDEX) AS max_columns
                FROM INFORMATION_SCHEMA.STATISTICS
                WHERE TABLE_SCHEMA = %s AND TABLE_NAME = %s
                      AND INDEX_NAME != 'PRIMARY'
                GROUP BY TABLE_SCHEMA, TABLE_NAME, INDEX_NAME
             ) AS s2
             ON s.TABLE_SCHEMA = s2.TABLE_SCHEMA
                AND s.TABLE_NAME = s2.TABLE_NAME
                AND s.INDEX_NAME = s2.INDEX_NAME
            WHERE t.TABLE_SCHEMA != 'mysql'
                AND t.TABLE_ROWS > 10 /* Only tables with some rows */
                AND s.CARDINALITY IS NOT NULL
                AND (s.CARDINALITY / IFNULL(
                IF(t.TABLE_ROWS < s.CARDINALITY, s.CARDINALITY, t.TABLE_ROWS),
                0.01)) <= 1.00
            ORDER BY `sel_percent`
        """
        query_options = {
            'params': (self.db_name, self.tbl_name,)
        }
        rows = []
        idx_type = "best"
        if not best:
            idx_type = "worst"
        if best:
            rows = self.server.exec_query(_QUERY + "DESC LIMIT %s" % limit,
                                          query_options)
        else:
            rows = self.server.exec_query(_QUERY + "LIMIT %s" % limit,
                                          query_options)
        if rows:
            print("#")
            if limit == 1:
                print("# Showing the {0} performing index from "
                      "{1}:".format(idx_type, self.table))
            else:
                print("# Showing the top {0} {1} performing indexes from "
                      "{2}:".format(limit, idx_type, self.table))
            print("#")
            cols = ("database", "table", "name", "column", "sequence",
                    "num columns", "cardinality", "est. rows", "percent")
            print_list(sys.stdout, fmt, cols, rows)
        else:
            print("# WARNING: Not enough data to calculate "
                  "best/worst indexes.")

    @staticmethod
    def __print_index_list(indexes, fmt, no_header=False):
        """Print the list of indexes

        indexes[in]        list of indexes to print
        fmt[in]            format out output = sql, table, tab, csv
        no_header[in]      (optional) if True, do not print the header
        """
        if fmt == "sql":
            for index in indexes:
                index.print_index_sql()
        else:
            cols = ("database", "table", "name", "type", "columns")
            rows = []
            for index in indexes:
                rows.append(index.get_row())
            print_list(sys.stdout, fmt, cols, rows, no_header)

    def print_indexes(self, fmt):
        """Print all indexes for this table

        fmt[in]         format out output = sql, table, tab, csv
        """

        print "# Showing indexes from %s:\n#" % (self.table)
        if fmt == "sql":
            self.__print_index_list(self.btree_indexes, fmt)
            self.__print_index_list(self.hash_indexes, fmt, False)
            self.__print_index_list(self.rtree_indexes, fmt, False)
            self.__print_index_list(self.fulltext_indexes, fmt, False)
        else:
            master_indexes = []
            master_indexes.extend(self.btree_indexes)
            master_indexes.extend(self.hash_indexes)
            master_indexes.extend(self.rtree_indexes)
            master_indexes.extend(self.fulltext_indexes)
            self.__print_index_list(master_indexes, fmt)
        print "#"

    def has_primary_key(self):
        """Check to see if there is a primary key.
        Returns bool - True - a primary key was found,
                       False - no primary key.
        """
        primary_key = False
        rows = self._get_index_list()
        for row in rows:
            if row[2] == "PRIMARY":
                primary_key = True
        return primary_key