Created
April 15, 2016 12:49
-
-
Save pybokeh/097ccc16c0e977ba85d3e8c533f47563 to your computer and use it in GitHub Desktop.
concatenating multiple text files into one file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"<center><h1>Concatenating Multiple Text Files Into One File</h1></center>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Obtaining list of files in a directory" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"from os import listdir\n", | |
"from os.path import isfile, join\n", | |
"\n", | |
"mypath = r'D:\\temp\\BobV'\n", | |
"\n", | |
"onlyfiles = [join(mypath,f) for f in listdir(mypath) if isfile(join(mypath, f))]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['D:\\\\temp\\\\BobV\\\\file1.txt', 'D:\\\\temp\\\\BobV\\\\file2.txt']" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"onlyfiles" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Contents of file1.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'mary had a little lamb.'" | |
] | |
}, | |
"execution_count": 24, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"open(r'D:\\temp\\BobV\\file1.txt').read()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"### Contents of file2.txt" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'the quick brown fox jumped over the lazy dog.'" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"open(r'D:\\temp\\BobV\\file2.txt').read()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## For large files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"with open(r'D:\\temp\\BobV\\concatenated_file.txt', 'w') as outfile:\n", | |
" for fname in onlyfiles:\n", | |
" with open(fname) as infile:\n", | |
" for line in infile:\n", | |
" outfile.write(' ' + line) # add a space in front to ensure we don't concatenate 2 words together" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## For small files" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"with open(r'D:\\temp\\BobV\\concatenated_file.txt', 'w') as outfile:\n", | |
" for fname in onlyfiles:\n", | |
" with open(fname) as infile:\n", | |
" outfile.write(' ') # add a blank space in front to ensure we don't concatenate 2 words together\n", | |
" outfile.write(infile.read())" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Now, let's view the concatenated text file" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"bobs_musings = open(r'D:\\temp\\BobV\\concatenated_file.txt')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 27, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
" mary had a little lamb. the quick brown fox jumped over the lazy dog.\n" | |
] | |
} | |
], | |
"source": [ | |
"print(bobs_musings.read())" | |
] | |
} | |
], | |
"metadata": { | |
"hide_input": false, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.4" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment