Created
December 3, 2019 03:40
-
-
Save hi5san/b5fd861bfe5b20d49a002313de1ce9bf to your computer and use it in GitHub Desktop.
IBM Cloud DB2 to DB2 data copy script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "# Migration of IBM Cloud DB2 to new DB2 instance\n\n## Imports and pre-reqs." | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "import ibm_db\nimport ibm_db_dbi\nimport pandas" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# sql magic\n!pip install --user ipython-sql\n!pip install --user ibm_db_sa\n%load_ext sql" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Tools" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "def prepDBConfig(hostname, uid, pwd):\n dbconfig = {}\n dbconfig['driver'] = \"{IBM DB2 ODBC DRIVER}\"\n dbconfig['database'] = \"BLUDB\"\n dbconfig['port'] = \"50000\"\n dbconfig['protocol'] = \"TCPIP\"\n dbconfig['hostname'] = hostname\n dbconfig['uid'] = uid\n dbconfig['pwd'] = pwd\n return dbconfig\n\ndef prepDSN(config):\n dsn = (\n \"DRIVER={0};\"\n \"DATABASE={1};\"\n \"HOSTNAME={2};\"\n \"PORT={3};\"\n \"PROTOCOL={4};\"\n \"UID={5};\"\n \"PWD={6};\").format(config['driver'],\n config['database'], \n config['hostname'],\n config['port'],\n config['protocol'],\n config['uid'],\n config['pwd'])\n return dsn\n\n#cfg=prepDB('one','two','three')\n#mdsn = prepDSN(cfg)\n#print(mdsn)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "def connectDB(dsn, dbconfig):\n #Create database connection\n try:\n # print (dsn)\n conn = ibm_db.connect(dsn, \"\", \"\")\n print (\"Connected to database: \", dbconfig['database'],\n \"as user: \", dbconfig['uid'],\n \"on host: \", dbconfig['hostname'])\n \n #Retrieve Metadata for the Database Server\n server = ibm_db.server_info(conn)\n print (\"== Server info ==\")\n print (\"DBMS_NAME: \", server.DBMS_NAME)\n print (\"DBMS_VER: \", server.DBMS_VER)\n print (\"DB_NAME: \", server.DB_NAME)\n \n #Retrieve Metadata for the Database Client / Driver\n client = ibm_db.client_info(conn)\n print (\"== Client info ==\")\n print (\"DRIVER_NAME: \", client.DRIVER_NAME) \n print (\"DRIVER_VER: \", client.DRIVER_VER)\n print (\"DATA_SOURCE_NAME: \", client.DATA_SOURCE_NAME)\n print (\"DRIVER_ODBC_VER: \", client.DRIVER_ODBC_VER)\n print (\"ODBC_VER: \", client.ODBC_VER)\n print (\"ODBC_SQL_CONFORMANCE: \", client.ODBC_SQL_CONFORMANCE)\n print (\"APPL_CODEPAGE: \", client.APPL_CODEPAGE)\n print (\"CONN_CODEPAGE: \", client.CONN_CODEPAGE)\n return conn\n except:\n print (\"Unable to connect: \", ibm_db.conn_errormsg() )\n raise\n\n#src_dbconn = connectDB(src_dbconfig)\n" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "# Setup Source DB connection" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Source DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Source DB2 USERID>\" # e.g. \"abc12345\"\ndsn_pwd = \"<Source DB2 PASSWORD>\" # e.g. \"7dBZ3wWt9XN6$o0J\"" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "src_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\nsrc_dsn = prepDSN(src_dbconfig)\nsrc_dbconn = connectDB(src_dsn, src_dbconfig)\nsrc_pconn = ibm_db_dbi.Connection(src_dbconn)\nprint(src_pconn)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "%sql ibm_db_sa://<Source DB2 USERID>:<Source DB2 PASSWORD>@<Source DB2 URL>:50000/BLUDB" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Setup Target DB connection" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "#Replace the placeholder values with your actual Db2 hostname, username, and password:\ndsn_hostname = \"<Destination DB2 URL>\" # e.g.: \"dashdb-txn-sbox-yp-dalNN-MM.services.dal.bluemix.net\"\ndsn_uid = \"<Destination DB2 USERID>\" # e.g. \"abc12345\"\ndsn_pwd = \"<Destination DB2 PASSWORD>\" # e.g. \"7dBZ3wWt9XN6$o0J\"" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "dst_dbconfig = prepDBConfig(dsn_hostname, dsn_uid, dsn_pwd)\ndst_dsn = prepDSN(dst_dbconfig)\ndst_dbconn = connectDB(dst_dsn, dst_dbconfig)\ndst_pconn = ibm_db_dbi.Connection(dst_dbconn)\nprint(dst_pconn)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "%sql ibm_db_sa://<Destination DB2 USERID>:<Destination DB2 PASSWORD>@<Destination DB2 URL>:50000/BLUDB" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Setup target DB" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "%%sql <Destination DB2 USERID>@BLUDB\nCREATE TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\" (\n\t\t \"ID\" INTEGER NOT NULL GENERATED BY DEFAULT AS IDENTITY ( \n\t\t START WITH +1 \n\t\t INCREMENT BY +1 \n\t\t MINVALUE +1 \n\t\t MAXVALUE +2147483647 \n\t\t NO CYCLE \n\t\t CACHE 10 \n\t\t ORDER ), \n\t\t \"UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"ADMIN_UUID\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"NAME\" VARCHAR(128 OCTETS) NOT NULL , \n\t\t \"DESCRIPTION\" VARCHAR(1024 OCTETS) NOT NULL , \n\t\t \"EMAIL\" VARCHAR(128 OCTETS) NOT NULL ) \n\t\t ORGANIZE BY ROW;" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "%%sql <Destination DB2 USERID>@BLUDB\nALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\"\n\tADD PRIMARY KEY\n\t\t(\"ID\");" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Copy data from src to dst" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "### Setup utilities" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "from ibmdbpy import IdaDataBase, IdaDataFrame" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# dst_dbconfig is prepared above\ndst_dash_dsn = 'DASHDB;Database={};Hostname={};Port={};PROTOCOL=TCPIP;UID={};PWD={}'.format(\n dst_dbconfig['database'],\n dst_dbconfig['hostname'],\n dst_dbconfig['port'],\n dst_dbconfig['uid'],\n dst_dbconfig['pwd']\n)\n\n#print(dst_dash_dsn)\ndst_idadb = IdaDataBase(dst_dash_dsn)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# Function to get all data from source db into pandas dataframe\n# Note that it needs to read in chunks as pd_read_sql may not\n# read all at once\ndef getAllDataAsDataFrame(tablename, con, chunksize=5000):\n selectQuery = \"select * from \" + tablename\n src_dfl = []\n #src_dfs = pandas.DataFrame()\n chunk = None\n for chunk in pandas.read_sql(selectQuery, con=con, chunksize=chunksize):\n #display (chunk)\n src_dfl.append(chunk)\n if chunk is not None:\n src_dfs = pandas.concat(src_dfl, ignore_index=True)\n else:\n src_dfs = pandas.DataFrame()\n display(src_dfs.shape)\n return src_dfs\n\n# Function to copy pandas dataframe to ibmdbpy DB\ndef copyAllDataToDstDb(dst_idadb, src_dfs, tablename, commit=False, close=False):\n dst_ida_df = IdaDataFrame(dst_idadb, tablename)\n #dst_ida_pdf = dst_ida_df.as_dataframe()\n dst_idadb.append(dst_ida_df, src_dfs)\n display(dst_ida_df.shape)\n if commit:\n dst_idadb.commit\n if close:\n dst_idadb.close()\n return dst_ida_df" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "### Extract data from src DB" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "src_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', src_pconn)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "#print(src_user_dfs.shape)\n#src_user_dfs.sort_values(by='ID',ascending=False).head()" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "### Copy data to target DB" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "raise BaseException(\"We already migrated the DB on Nov. 28, 2019. Do NOT run sql update statements! Proceed only if you know what you're doing.\")\ndst_event_ida_df = copyAllDataToDstDb(dst_idadb, src_event_dfs, '<Destination DB2 TABLE>')" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# Commit and close to reflect changes\ndst_idadb.commit()\ndst_idadb.close()" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "#tmp_pdf = dst_ida_df.as_dataframe()\n#tmp_pdf.shape\n#tmp_pdf.tail\n#tmp_pdf.sort_values(by='ID',ascending=False).head()" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "### Check result DB" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "dst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Result check - Num of entries for source and target DB (they should match)" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": "%%sql <Source DB2 TABLE>@BLUDB\nselect \n (select count(*) from <Source DB2 TABLE>) as <Source DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY " | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": "%%sql <Destination DB2 TABLE>@BLUDB\nselect \n (select count(*) from <Destination DB2 TABLE>) as <Destination DB2 TABLE>\nfrom SYSCAT.COLUMNS\nFETCH FIRST 1 ROWS ONLY " | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Adjust initial ID (by using SQL restart)\nThis adjustment is needed, since initially, \nthe new ID sequence will start from 1, and will clash with existing data.\n\n### Strategy\nWe find the maximum ID for each table, add some buffer (1000) and use that as restart sequence ID." | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "#### Find max IDs and value of restart Id" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "ID_RESTART_BUF = 1000\ndef calcRestartId(idmax):\n newidmax = idmax - (idmax % ID_RESTART_BUF) + ID_RESTART_BUF\n print('idmax=%d -> restart id=%d' % (idmax, newidmax))\n return newidmax" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "dst_event_idmax = calcRestartId(dst_event_dfs.iloc[dst_event_dfs['ID'].idxmax()]['ID'])\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(2))" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "## Alter restart for column ID" | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "ibm_db.exec_immediate(dst_dbconn, 'ALTER TABLE \"<Destination DB2 USERID>\".\"<Destination DB2 TABLE>\" ALTER COLUMN \"ID\" RESTART WITH ' + str(dst_event_idmax))" | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": "# All Done!\nPlease, add an event, add an entry, make sure the IDs are restarted correctly." | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": "# for checks\ndst_event_dfs=getAllDataAsDataFrame('<Destination DB2 TABLE>', dst_pconn)\n\ndisplay(dst_event_dfs.sort_values(by='ID')['ID'].tail(10))" | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.6", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment