Skip to content

Instantly share code, notes, and snippets.

@stevekm
Last active September 26, 2016 19:35
Show Gist options
  • Save stevekm/18037838e6f5b9c04ecfbe0baf14026f to your computer and use it in GitHub Desktop.
Save stevekm/18037838e6f5b9c04ecfbe0baf14026f to your computer and use it in GitHub Desktop.
finding files in Python
#!/usr/bin/env python
# python 2.7
# various ways to try to find and return the files in a given directory
# some of these are ok, some are bad, some are real real bad
import os
import re
import glob
barcode = "my_file_prefix"
run_bam_dir = "/path/to/dir
# use this one; find the file in the dir with the matching prefix and file extension
res = glob.glob(run_bam_dir + '/{}*.bam'.format(barcode))
# check the length of the glob results
if len(res) != 1:
print "Warning multiple files returned"
# check if file exists
os.path.isfile(res[0])
# OTHER WAYS:
# using a regex
res = [f for f in os.listdir(run_bam_dir) if re.search(r'{}.*\.bam$'.format(barcode), f)]
for f in res:
print os.path.join(run_bam_dir, f)
# using a glob
res = [f for f in glob.glob(run_bam_dir + '/*.bam') if os.path.basename(f).startswith(barcode) ]
for file in os.listdir(run_bam_dir):
# print file
if ( file.endswith(".bam") and file.startswith(barcode) ):
print(file)
# find the bam files
for subdir, dirs, files in os.walk(run_bam_dir):
# print subdir
# print dirs
# print files
for file in files:
if ( file.endswith(".bam") and file.startswith(barcode) ):
print os.path.join(subdir,file)
print run_bam_dir
for path, subdirs, files in os.walk(run_bam_dir):
print path
print '\n'
print subdirs
print '\n'
print files
print '\n'
for file in files:
print os.path.abspath(file) # THIS GIVES THE WRONG PATH when the dir isnt a subdir of pwd
print '\n'
for d in dirs:
print d
for f in glob.iglob(os.path.join(path, d, '*.bam')):
print f
# find the VCF files in the input dir
raw_vcf_paths = []
for subdir, dirs, files in os.walk(input_dir):
for file in files:
if file.endswith('.vcf'):
raw_vcf_paths.append((os.path.join(subdir,file)))
raw_vcf_paths.sort()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment