hi5san/IBM Cloud DB2 migration script.ipynb

## IBM Cloud DB2 migration script.ipynb
{
    "cells": [
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "# Migration of IBM Cloud DB2 to new DB2 instance\n\n## Imports and pre-reqs."
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "import ibm_db\nimport ibm_db_dbi\nimport pandas"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "# sql magic\n!pip install --user ipython-sql\n!pip install --user ibm_db_sa\n%load_ext sql"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Tools"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "def prepDBConfig(hostname, uid, pwd):\n    dbconfig = {}\n    dbconfig['driver'] = \"{IBM DB2 ODBC DRIVER}\"\n    dbconfig['database'] = \"BLUDB\"\n    dbconfig['port'] = \"50000\"\n    dbconfig['protocol'] = \"TCPIP\"\n    dbconfig['hostname'] = hostname\n    dbconfig['uid'] = uid\n    dbconfig['pwd'] = pwd\n    return dbconfig\n\ndef prepDSN(config):\n    dsn = (\n    \"DRIVER={0};\"\n    \"DATABASE={1};\"\n    \"HOSTNAME={2};\"\n    \"PORT={3};\"\n    \"PROTOCOL={4};\"\n    \"UID={5};\"\n    \"PWD={6};\").format(config['driver'],\n                       config['database'], \n                       config['hostname'],\n                       config['port'],\n                       config['protocol'],\n                       config['uid'],\n                       config['pwd'])\n    return dsn\n\n#cfg=prepDB('one','two','three')\n#mdsn = prepDSN(cfg)\n#print(mdsn)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "def connectDB(dsn, dbconfig):\n    #Create database connection\n    try:\n        # print (dsn)\n        conn = ibm_db.connect(dsn, \"\", \"\")\n        print (\"Connected to database: \", dbconfig['database'],\n               \"as user: \", dbconfig['uid'],\n               \"on host: \", dbconfig['hostname'])\n        \n        #Retrieve Metadata for the Database Server\n        server = ibm_db.server_info(conn)\n        print (\"== Server info ==\")\n        print (\"DBMS_NAME:            \", server.DBMS_NAME)\n        print (\"DBMS_VER:             \", server.DBMS_VER)\n        print (\"DB_NAME:              \", server.DB_NAME)\n        \n        #Retrieve Metadata for the Database Client / Driver\n        client = ibm_db.client_info(conn)\n        print (\"== Client info ==\")\n        print (\"DRIVER_NAME:          \", client.DRIVER_NAME) \n        print (\"DRIVER_VER:           \", client.DRIVER_VER)\n        print (\"DATA_SOURCE_NAME:     \", client.DATA_SOURCE_NAME)\n        print (\"DRIVER_ODBC_VER:      \", client.DRIVER_ODBC_VER)\n        print (\"ODBC_VER:             \", client.ODBC_VER)\n        print (\"ODBC_SQL_CONFORMANCE: \", client.ODBC_SQL_CONFORMANCE)\n        print (\"APPL_CODEPAGE:        \", client.APPL_CODEPAGE)\n        print (\"CONN_CODEPAGE:        \", client.CONN_CODEPAGE)\n        return conn\n    except:\n        print (\"Unable to connect: \", ibm_db.conn_errormsg() )\n        raise\n\n#src_dbconn = connectDB(src_dbconfig)\n"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "# Setup Source DB connection"
        },
        {
            "cell_type": "code",
            "execution_count": 1,
            "metadata": {
                "scrolled": true
            },
            "outputs": [],
            "source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Source DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Source DB2 USERID>\"        # e.g. \"abc12345\"\ndsn_pwd = \"<Source DB2 PASSWORD>\"    # e.g. \"7dBZ3wWt9XN6$o0J\""
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "src_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\nsrc_dsn = prepDSN(src_dbconfig)\nsrc_dbconn = connectDB(src_dsn, src_dbconfig)\nsrc_pconn = ibm_db_dbi.Connection(src_dbconn)\nprint(src_pconn)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "%sql ibm_db_sa://<Source DB2 USERID>:<Source DB2 PASSWORD>@<Source DB2 URL>:50000/BLUDB"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Setup Target DB connection"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Destination DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Destination DB2 USERID>\"        # e.g. \"abc12345\"\ndsn_pwd = \"<Destination DB2 PASSWORD>\"    # e.g. \"7dBZ3wWt9XN6$o0J\""
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "dst_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\ndst_dsn = prepDSN(dst_dbconfig)\ndst_dbconn = connectDB(dst_dsn, dst_dbconfig)\ndst_pconn = ibm_db_dbi.Connection(dst_dbconn)\nprint(dst_pconn)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "%sql ibm_db_sa://<Destination DB2 USERID>:<Destination DB2 PASSWORD>@<Destination DB2 URL>:50000/BLUDB"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Setup target DB"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019.  Do NOT run sql update statements!  Proceed only if you know what you're doing.\")"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "%%sql <Destination DB2 USERID>@BLUDB\nCREATE TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\"  (\n\t\t  \"ID\" INTEGER NOT NULL GENERATED BY DEFAULT AS IDENTITY (  \n\t\t    START WITH +1  \n\t\t    INCREMENT BY +1  \n\t\t    MINVALUE +1  \n\t\t    MAXVALUE +2147483647  \n\t\t    NO CYCLE  \n\t\t    CACHE 10  \n\t\t    ORDER ), \n\t\t  \"UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t  \"ADMIN_UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t  \"NAME\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t  \"DESCRIPTION\" VARCHAR(1024 OCTETS) NOT NULL , \n\t\t  \"EMAIL\" VARCHAR(128 OCTETS) NOT NULL )   \n\t\t ORGANIZE BY ROW;"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "%%sql <Destination DB2 USERID>@BLUDB\nALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\"\n\tADD PRIMARY KEY\n\t\t(\"ID\");"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Copy data from src to dst"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019.  Do NOT run sql update statements!  Proceed only if you know what you're doing.\")"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "### Setup utilities"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "from ibmdbpy import IdaDataBase, IdaDataFrame"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "# dst_dbconfig is prepared above\ndst_dash_dsn = 'DASHDB;Database={};Hostname={};Port={};PROTOCOL=TCPIP;UID={};PWD={}'.format(\n    dst_dbconfig['database'],\n    dst_dbconfig['hostname'],\n    dst_dbconfig['port'],\n    dst_dbconfig['uid'],\n    dst_dbconfig['pwd']\n)\n\n#print(dst_dash_dsn)\ndst_idadb = IdaDataBase(dst_dash_dsn)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "# Function to get all data from source db into pandas dataframe\n# Note that it needs to read in chunks as pd_read_sql may not\n# read all at once\ndef getAllDataAsDataFrame(tablename, con, chunksize=5000):\n    selectQuery = \"select * from \" + tablename\n    src_dfl = []\n    #src_dfs = pandas.DataFrame()\n    chunk = None\n    for chunk in pandas.read_sql(selectQuery, con=con, chunksize=chunksize):\n        #display (chunk)\n        src_dfl.append(chunk)\n    if chunk is not None:\n        src_dfs = pandas.concat(src_dfl, ignore_index=True)\n    else:\n        src_dfs = pandas.DataFrame()\n    display(src_dfs.shape)\n    return src_dfs\n\n# Function to copy pandas dataframe to ibmdbpy DB\ndef copyAllDataToDstDb(dst_idadb, src_dfs, tablename, commit=False, close=False):\n    dst_ida_df = IdaDataFrame(dst_idadb, tablename)\n    #dst_ida_pdf = dst_ida_df.as_dataframe()\n    dst_idadb.append(dst_ida_df, src_dfs)\n    display(dst_ida_df.shape)\n    if commit:\n        dst_idadb.commit\n    if close:\n        dst_idadb.close()\n    return dst_ida_df"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "### Extract data from src DB"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "src_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', src_pconn)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "#print(src_user_dfs.shape)\n#src_user_dfs.sort_values(by='ID',ascending=False).head()"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "### Copy data to target DB"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019.  Do NOT run sql update statements!  Proceed only if you know what you're doing.\")\ndst_event_ida_df = copyAllDataToDstDb(dst_idadb, src_event_dfs, '<Destination DB2 TABLE>')"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "# Commit and close to reflect changes\ndst_idadb.commit()\ndst_idadb.close()"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "#tmp_pdf = dst_ida_df.as_dataframe()\n#tmp_pdf.shape\n#tmp_pdf.tail\n#tmp_pdf.sort_values(by='ID',ascending=False).head()"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "### Check result DB"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "dst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Result check - Num of entries for source and target DB (they should match)"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "scrolled": true
            },
            "outputs": [],
            "source": "%%sql <Source DB2 TABLE>@BLUDB\nselect \n    (select count(*) from <Source DB2 TABLE>) as <Source DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY "
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {
                "scrolled": true
            },
            "outputs": [],
            "source": "%%sql <Destination DB2 TABLE>@BLUDB\nselect \n    (select count(*) from <Destination DB2 TABLE>) as <Destination DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY "
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Adjust initial ID (by using SQL restart)\nThis adjustment is needed, since initially, \nthe new ID sequence will start from 1, and will clash with existing data.\n\n### Strategy\nWe find the maximum ID for each table, add some buffer (1000) and use that as restart sequence ID."
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "#### Find max IDs and value of restart Id"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "ID_RESTART_BUF = 1000\ndef calcRestartId(idmax):\n    newidmax = idmax - (idmax % ID_RESTART_BUF) + ID_RESTART_BUF\n    print('idmax=%d -> restart id=%d' % (idmax, newidmax))\n    return newidmax"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "dst_event_idmax = calcRestartId(dst_event_dfs.iloc[dst_event_dfs['ID'].idxmax()]['ID'])\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(2))"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "## Alter restart for column ID"
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "ibm_db.exec_immediate(dst_dbconn, 'ALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\" ALTER COLUMN \"ID\" RESTART WITH ' + str(dst_event_idmax))"
        },
        {
            "cell_type": "markdown",
            "metadata": {},
            "source": "# All Done!\nPlease, add an event, add an entry, make sure the IDs are restarted correctly."
        },
        {
            "cell_type": "code",
            "execution_count": null,
            "metadata": {},
            "outputs": [],
            "source": "# for checks\ndst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)\n\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(10))"
        }
    ],
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3.6",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.6.8"
        }
    },
    "nbformat": 4,
    "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "# Migration of IBM Cloud DB2 to new DB2 instance\n\n## Imports and pre-reqs."
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "import ibm_db\nimport ibm_db_dbi\nimport pandas"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "# sql magic\n!pip install --user ipython-sql\n!pip install --user ibm_db_sa\n%load_ext sql"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Tools"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "def prepDBConfig(hostname, uid, pwd):\n dbconfig = {}\n dbconfig['driver'] = \"{IBM DB2 ODBC DRIVER}\"\n dbconfig['database'] = \"BLUDB\"\n dbconfig['port'] = \"50000\"\n dbconfig['protocol'] = \"TCPIP\"\n dbconfig['hostname'] = hostname\n dbconfig['uid'] = uid\n dbconfig['pwd'] = pwd\n return dbconfig\n\ndef prepDSN(config):\n dsn = (\n \"DRIVER={0};\"\n \"DATABASE={1};\"\n \"HOSTNAME={2};\"\n \"PORT={3};\"\n \"PROTOCOL={4};\"\n \"UID={5};\"\n \"PWD={6};\").format(config['driver'],\n config['database'], \n config['hostname'],\n config['port'],\n config['protocol'],\n config['uid'],\n config['pwd'])\n return dsn\n\n#cfg=prepDB('one','two','three')\n#mdsn = prepDSN(cfg)\n#print(mdsn)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "def connectDB(dsn, dbconfig):\n #Create database connection\n try:\n # print (dsn)\n conn = ibm_db.connect(dsn, \"\", \"\")\n print (\"Connected to database: \", dbconfig['database'],\n \"as user: \", dbconfig['uid'],\n \"on host: \", dbconfig['hostname'])\n \n #Retrieve Metadata for the Database Server\n server = ibm_db.server_info(conn)\n print (\"== Server info ==\")\n print (\"DBMS_NAME: \", server.DBMS_NAME)\n print (\"DBMS_VER: \", server.DBMS_VER)\n print (\"DB_NAME: \", server.DB_NAME)\n \n #Retrieve Metadata for the Database Client / Driver\n client = ibm_db.client_info(conn)\n print (\"== Client info ==\")\n print (\"DRIVER_NAME: \", client.DRIVER_NAME) \n print (\"DRIVER_VER: \", client.DRIVER_VER)\n print (\"DATA_SOURCE_NAME: \", client.DATA_SOURCE_NAME)\n print (\"DRIVER_ODBC_VER: \", client.DRIVER_ODBC_VER)\n print (\"ODBC_VER: \", client.ODBC_VER)\n print (\"ODBC_SQL_CONFORMANCE: \", client.ODBC_SQL_CONFORMANCE)\n print (\"APPL_CODEPAGE: \", client.APPL_CODEPAGE)\n print (\"CONN_CODEPAGE: \", client.CONN_CODEPAGE)\n return conn\n except:\n print (\"Unable to connect: \", ibm_db.conn_errormsg() )\n raise\n\n#src_dbconn = connectDB(src_dbconfig)\n"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "# Setup Source DB connection"
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Source DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Source DB2 USERID>\" # e.g. \"abc12345\"\ndsn_pwd = \"<Source DB2 PASSWORD>\" # e.g. \"7dBZ3wWt9XN6$o0J\""
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "src_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\nsrc_dsn = prepDSN(src_dbconfig)\nsrc_dbconn = connectDB(src_dsn, src_dbconfig)\nsrc_pconn = ibm_db_dbi.Connection(src_dbconn)\nprint(src_pconn)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "%sql ibm_db_sa://<Source DB2 USERID>:<Source DB2 PASSWORD>@<Source DB2 URL>:50000/BLUDB"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Setup Target DB connection"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Destination DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Destination DB2 USERID>\" # e.g. \"abc12345\"\ndsn_pwd = \"<Destination DB2 PASSWORD>\" # e.g. \"7dBZ3wWt9XN6$o0J\""
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "dst_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\ndst_dsn = prepDSN(dst_dbconfig)\ndst_dbconn = connectDB(dst_dsn, dst_dbconfig)\ndst_pconn = ibm_db_dbi.Connection(dst_dbconn)\nprint(dst_pconn)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "%sql ibm_db_sa://<Destination DB2 USERID>:<Destination DB2 PASSWORD>@<Destination DB2 URL>:50000/BLUDB"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Setup target DB"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "%%sql <Destination DB2 USERID>@BLUDB\nCREATE TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\" (\n\t\t \"ID\" INTEGER NOT NULL GENERATED BY DEFAULT AS IDENTITY ( \n\t\t START WITH +1 \n\t\t INCREMENT BY +1 \n\t\t MINVALUE +1 \n\t\t MAXVALUE +2147483647 \n\t\t NO CYCLE \n\t\t CACHE 10 \n\t\t ORDER ), \n\t\t \"UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"ADMIN_UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"NAME\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"DESCRIPTION\" VARCHAR(1024 OCTETS) NOT NULL , \n\t\t \"EMAIL\" VARCHAR(128 OCTETS) NOT NULL ) \n\t\t ORGANIZE BY ROW;"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "%%sql <Destination DB2 USERID>@BLUDB\nALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\"\n\tADD PRIMARY KEY\n\t\t(\"ID\");"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Copy data from src to dst"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "### Setup utilities"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "from ibmdbpy import IdaDataBase, IdaDataFrame"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "# dst_dbconfig is prepared above\ndst_dash_dsn = 'DASHDB;Database={};Hostname={};Port={};PROTOCOL=TCPIP;UID={};PWD={}'.format(\n dst_dbconfig['database'],\n dst_dbconfig['hostname'],\n dst_dbconfig['port'],\n dst_dbconfig['uid'],\n dst_dbconfig['pwd']\n)\n\n#print(dst_dash_dsn)\ndst_idadb = IdaDataBase(dst_dash_dsn)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "# Function to get all data from source db into pandas dataframe\n# Note that it needs to read in chunks as pd_read_sql may not\n# read all at once\ndef getAllDataAsDataFrame(tablename, con, chunksize=5000):\n selectQuery = \"select * from \" + tablename\n src_dfl = []\n #src_dfs = pandas.DataFrame()\n chunk = None\n for chunk in pandas.read_sql(selectQuery, con=con, chunksize=chunksize):\n #display (chunk)\n src_dfl.append(chunk)\n if chunk is not None:\n src_dfs = pandas.concat(src_dfl, ignore_index=True)\n else:\n src_dfs = pandas.DataFrame()\n display(src_dfs.shape)\n return src_dfs\n\n# Function to copy pandas dataframe to ibmdbpy DB\ndef copyAllDataToDstDb(dst_idadb, src_dfs, tablename, commit=False, close=False):\n dst_ida_df = IdaDataFrame(dst_idadb, tablename)\n #dst_ida_pdf = dst_ida_df.as_dataframe()\n dst_idadb.append(dst_ida_df, src_dfs)\n display(dst_ida_df.shape)\n if commit:\n dst_idadb.commit\n if close:\n dst_idadb.close()\n return dst_ida_df"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "### Extract data from src DB"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "src_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', src_pconn)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "#print(src_user_dfs.shape)\n#src_user_dfs.sort_values(by='ID',ascending=False).head()"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "### Copy data to target DB"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")\ndst_event_ida_df = copyAllDataToDstDb(dst_idadb, src_event_dfs, '<Destination DB2 TABLE>')"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "# Commit and close to reflect changes\ndst_idadb.commit()\ndst_idadb.close()"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "#tmp_pdf = dst_ida_df.as_dataframe()\n#tmp_pdf.shape\n#tmp_pdf.tail\n#tmp_pdf.sort_values(by='ID',ascending=False).head()"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "### Check result DB"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "dst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Result check - Num of entries for source and target DB (they should match)"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": "%%sql <Source DB2 TABLE>@BLUDB\nselect \n (select count(*) from <Source DB2 TABLE>) as <Source DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY "
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"scrolled": true
	},
	"outputs": [],
	"source": "%%sql <Destination DB2 TABLE>@BLUDB\nselect \n (select count(*) from <Destination DB2 TABLE>) as <Destination DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY "
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Adjust initial ID (by using SQL restart)\nThis adjustment is needed, since initially, \nthe new ID sequence will start from 1, and will clash with existing data.\n\n### Strategy\nWe find the maximum ID for each table, add some buffer (1000) and use that as restart sequence ID."
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "#### Find max IDs and value of restart Id"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "ID_RESTART_BUF = 1000\ndef calcRestartId(idmax):\n newidmax = idmax - (idmax % ID_RESTART_BUF) + ID_RESTART_BUF\n print('idmax=%d -> restart id=%d' % (idmax, newidmax))\n return newidmax"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "dst_event_idmax = calcRestartId(dst_event_dfs.iloc[dst_event_dfs['ID'].idxmax()]['ID'])\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(2))"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "## Alter restart for column ID"
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "ibm_db.exec_immediate(dst_dbconn, 'ALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\" ALTER COLUMN \"ID\" RESTART WITH ' + str(dst_event_idmax))"
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": "# All Done!\nPlease, add an event, add an entry, make sure the IDs are restarted correctly."
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": "# for checks\ndst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)\n\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(10))"
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3.6",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.8"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}