Created
September 6, 2017 20:45
-
-
Save dmofot/f4f0573d4d418fbf4b9b863e731e05ba to your computer and use it in GitHub Desktop.
multiple-csv-merge.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 31, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# imports\n", | |
"import os\n", | |
"import glob\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 32, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# create dataframes\n", | |
"df1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number'])\n", | |
"df2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number'])\n", | |
"df3 = pd.DataFrame([['c', 3, 'cat'], ['d', 4, 'dog']], columns=['letter', 'number', 'animal'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 33, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"# save dataframes to csv\n", | |
"df1.to_csv('df1.csv', index=False)\n", | |
"df2.to_csv('df2.csv', index=False)\n", | |
"df3.to_csv('df3.csv', index=False)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"['./df1.csv', './df2.csv', './df3.csv']\n" | |
] | |
} | |
], | |
"source": [ | |
"# set path and get csv files\n", | |
"path = r'.'\n", | |
"all_files = glob.glob(os.path.join(path, \"*.csv\"))\n", | |
"print(all_files)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" number number number animal\n", | |
"a 1.0 NaN NaN NaN\n", | |
"b 2.0 NaN NaN NaN\n", | |
"c NaN 3.0 3.0 cat\n", | |
"d NaN 4.0 4.0 dog\n" | |
] | |
} | |
], | |
"source": [ | |
"# read each csv file, set index, and concatenate based on index\n", | |
"df_from_each_file = (pd.read_csv(f, index_col='letter') for f in all_files)\n", | |
"concatenated_df = pd.concat(df_from_each_file, axis=1)\n", | |
"print(concatenated_df)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3.6", | |
"language": "python", | |
"name": "python36" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment