89465127/open_hadoop.py

## open_hadoop.py
import glob
import os

def filelist(path, _filter="part-*"):
    basepath = os.path.abspath(os.path.expanduser(path))
    return [filename for filename in glob.glob(basepath + '/' + _filter)]

def hfile(path, _filter="part-*"):
    for filename in filelist(path, _filter):
        with open(filename) as f:
            for line in f:
                yield line


''' Usage example:

from open_hadoop import hfile

for line in hfile('./input/path/'):
    print line

'''


''' Installation:

- Place open_hadoop.py in your site-packages directory.
- Your site-packages directory can be located by running:
python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"

'''
	import glob
	import os

	def filelist(path, _filter="part-*"):
	basepath = os.path.abspath(os.path.expanduser(path))
	return [filename for filename in glob.glob(basepath + '/' + _filter)]

	def hfile(path, _filter="part-*"):
	for filename in filelist(path, _filter):
	with open(filename) as f:
	for line in f:
	yield line


	''' Usage example:

	from open_hadoop import hfile

	for line in hfile('./input/path/'):
	print line

	'''


	''' Installation:

	- Place open_hadoop.py in your site-packages directory.
	- Your site-packages directory can be located by running:
	python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())"

	'''