rs6000/2019_PSE_2_MYSQL.ipynb

## 2019_PSE_2_MYSQL.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:22:30.799774Z",
     "start_time": "2019-01-12T13:22:30.329252Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from sqlalchemy import create_engine"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:22:30.832339Z",
     "start_time": "2019-01-12T13:22:30.806820Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#連線conn\n",
    "engine = create_engine('mysql://資料庫使用者:密碼@localhost/資料庫名稱?charset=utf8',echo = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:22:31.353049Z",
     "start_time": "2019-01-12T13:22:31.343173Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#工作路徑設定\n",
    "#取得程式目前所在路徑\n",
    "workpath = os.getcwd()\n",
    "#csv的存放路徑\n",
    "csv_path = os.path.join(workpath, \"temp_csv\")\n",
    "csv_file = os.path.join(csv_path, 'two_list.txt')\n",
    "print(csv_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:22:32.281629Z",
     "start_time": "2019-01-12T13:22:32.266454Z"
    }
   },
   "outputs": [],
   "source": [
    "#將csv清單用pandas讀取\n",
    "csv_list=pd.read_csv(csv_file,sep='\\t',header=None)\n",
    "csv_list.columns = ['name']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:22:33.222091Z",
     "start_time": "2019-01-12T13:22:33.034119Z"
    },
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "for i in csv_list['name']:\n",
    "    #讀檔\n",
    "    data=pd.read_csv(os.path.join(csv_path, i))\n",
    "    #欄位名稱\n",
    "    data.columns = ['stock_name', 'date', 'open','high','low','close','volume','nfb/s']\n",
    "    #日期格式\n",
    "    data['date']=pd.to_datetime(data['date'], format=\"%m/%d/%Y\")\n",
    "    #股票資料\n",
    "    allstocks = [True if x[0] != '^' else False for x in data['stock_name']]\n",
    "    #市場指數\n",
    "    markets=[True if x[0] == '^' else False for x in data['stock_name']]\n",
    "    new_markets=data[markets]\n",
    "    #清除不要的字元 ^\n",
    "    new_markets['stock_name'] = new_markets['stock_name'].astype(str)\n",
    "    new_markets['stock_name'] = new_markets['stock_name'].str.strip('^')\n",
    "    \n",
    "    #寫入Mysql資料庫 \n",
    "    #檢查資料是否已經存在 \n",
    "    ck_time=data[allstocks]['date'][1]\n",
    "    ck_name=data[allstocks]['stock_name'][1]\n",
    "    \n",
    "    sql='''\n",
    "    SELECT * FROM stocks WHERE stock_name = '{}' AND date = '{}'\n",
    "    '''.format(ck_name,ck_time)\n",
    "    \n",
    "    ck_stock = pd.read_sql(sql, con = engine)\n",
    "    # len*(ck_stock) 等於0 表示資料庫無該筆資料 1:有\n",
    "    if len(ck_stock):\n",
    "        print(\"資料已在資料庫內!!!\",ck_stock)\n",
    "    else:\n",
    "        print(\"資料寫入資料庫!!!\")\n",
    "        data[allstocks].to_sql('stocks',engine,if_exists='append', index=False)\n",
    "        new_markets.to_sql('markets',engine,if_exists='append', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:17:33.387703Z",
     "start_time": "2019-01-12T13:17:33.379271Z"
    }
   },
   "outputs": [],
   "source": [
    "create_stocks_sql='''\n",
    "CREATE TABLE IF NOT EXISTS `stocks`(\n",
    "    `stock_name` TEXT COLLATE utf8_unicode_ci,\n",
    "    `date` DATETIME DEFAULT NULL,\n",
    "    `open` DOUBLE DEFAULT NULL,\n",
    "    `high` DOUBLE DEFAULT NULL,\n",
    "    `low` DOUBLE DEFAULT NULL,\n",
    "    `close` DOUBLE DEFAULT NULL,\n",
    "    `volume` BIGINT(20) DEFAULT NULL,\n",
    "    `nfb/s` BIGINT(20) DEFAULT NULL\n",
    ")ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;\n",
    "'''\n",
    "create_stocks_markets='''\n",
    "CREATE TABLE  IF NOT EXISTS `markets` (\n",
    "  `stock_name` text COLLATE utf8_unicode_ci,\n",
    "  `date` datetime DEFAULT NULL,\n",
    "  `open` double DEFAULT NULL,\n",
    "  `high` double DEFAULT NULL,\n",
    "  `low` double DEFAULT NULL,\n",
    "  `close` double DEFAULT NULL,\n",
    "  `volume` bigint(20) DEFAULT NULL,\n",
    "  `nfb/s` bigint(20) DEFAULT NULL\n",
    ") ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;\n",
    "'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-12T13:17:34.908128Z",
     "start_time": "2019-01-12T13:17:34.873713Z"
    }
   },
   "outputs": [],
   "source": [
    "#run_sql=pd.read_sql(create_stocks_sql, con = engine)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:22:30.799774Z",
	"start_time": "2019-01-12T13:22:30.329252Z"
	},
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import os\n",
	"from sqlalchemy import create_engine"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:22:30.832339Z",
	"start_time": "2019-01-12T13:22:30.806820Z"
	},
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"#連線conn\n",
	"engine = create_engine('mysql://資料庫使用者:密碼@localhost/資料庫名稱?charset=utf8',echo = False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:22:31.353049Z",
	"start_time": "2019-01-12T13:22:31.343173Z"
	},
	"scrolled": true
	},
	"outputs": [],
	"source": [
	"#工作路徑設定\n",
	"#取得程式目前所在路徑\n",
	"workpath = os.getcwd()\n",
	"#csv的存放路徑\n",
	"csv_path = os.path.join(workpath, \"temp_csv\")\n",
	"csv_file = os.path.join(csv_path, 'two_list.txt')\n",
	"print(csv_file)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:22:32.281629Z",
	"start_time": "2019-01-12T13:22:32.266454Z"
	}
	},
	"outputs": [],
	"source": [
	"#將csv清單用pandas讀取\n",
	"csv_list=pd.read_csv(csv_file,sep='\\t',header=None)\n",
	"csv_list.columns = ['name']"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:22:33.222091Z",
	"start_time": "2019-01-12T13:22:33.034119Z"
	},
	"scrolled": false
	},
	"outputs": [],
	"source": [
	"for i in csv_list['name']:\n",
	" #讀檔\n",
	" data=pd.read_csv(os.path.join(csv_path, i))\n",
	" #欄位名稱\n",
	" data.columns = ['stock_name', 'date', 'open','high','low','close','volume','nfb/s']\n",
	" #日期格式\n",
	" data['date']=pd.to_datetime(data['date'], format=\"%m/%d/%Y\")\n",
	" #股票資料\n",
	" allstocks = [True if x[0] != '^' else False for x in data['stock_name']]\n",
	" #市場指數\n",
	" markets=[True if x[0] == '^' else False for x in data['stock_name']]\n",
	" new_markets=data[markets]\n",
	" #清除不要的字元 ^\n",
	" new_markets['stock_name'] = new_markets['stock_name'].astype(str)\n",
	" new_markets['stock_name'] = new_markets['stock_name'].str.strip('^')\n",
	" \n",
	" #寫入Mysql資料庫 \n",
	" #檢查資料是否已經存在 \n",
	" ck_time=data[allstocks]['date'][1]\n",
	" ck_name=data[allstocks]['stock_name'][1]\n",
	" \n",
	" sql='''\n",
	" SELECT * FROM stocks WHERE stock_name = '{}' AND date = '{}'\n",
	" '''.format(ck_name,ck_time)\n",
	" \n",
	" ck_stock = pd.read_sql(sql, con = engine)\n",
	" # len*(ck_stock) 等於0 表示資料庫無該筆資料 1:有\n",
	" if len(ck_stock):\n",
	" print(\"資料已在資料庫內!!!\",ck_stock)\n",
	" else:\n",
	" print(\"資料寫入資料庫!!!\")\n",
	" data[allstocks].to_sql('stocks',engine,if_exists='append', index=False)\n",
	" new_markets.to_sql('markets',engine,if_exists='append', index=False)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:17:33.387703Z",
	"start_time": "2019-01-12T13:17:33.379271Z"
	}
	},
	"outputs": [],
	"source": [
	"create_stocks_sql='''\n",
	"CREATE TABLE IF NOT EXISTS `stocks`(\n",
	" `stock_name` TEXT COLLATE utf8_unicode_ci,\n",
	" `date` DATETIME DEFAULT NULL,\n",
	" `open` DOUBLE DEFAULT NULL,\n",
	" `high` DOUBLE DEFAULT NULL,\n",
	" `low` DOUBLE DEFAULT NULL,\n",
	" `close` DOUBLE DEFAULT NULL,\n",
	" `volume` BIGINT(20) DEFAULT NULL,\n",
	" `nfb/s` BIGINT(20) DEFAULT NULL\n",
	")ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;\n",
	"'''\n",
	"create_stocks_markets='''\n",
	"CREATE TABLE IF NOT EXISTS `markets` (\n",
	" `stock_name` text COLLATE utf8_unicode_ci,\n",
	" `date` datetime DEFAULT NULL,\n",
	" `open` double DEFAULT NULL,\n",
	" `high` double DEFAULT NULL,\n",
	" `low` double DEFAULT NULL,\n",
	" `close` double DEFAULT NULL,\n",
	" `volume` bigint(20) DEFAULT NULL,\n",
	" `nfb/s` bigint(20) DEFAULT NULL\n",
	") ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci;\n",
	"'''"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"ExecuteTime": {
	"end_time": "2019-01-12T13:17:34.908128Z",
	"start_time": "2019-01-12T13:17:34.873713Z"
	}
	},
	"outputs": [],
	"source": [
	"#run_sql=pd.read_sql(create_stocks_sql, con = engine)"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.5"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}