Last active
September 26, 2016 19:35
-
-
Save stevekm/18037838e6f5b9c04ecfbe0baf14026f to your computer and use it in GitHub Desktop.
finding files in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# python 2.7 | |
# various ways to try to find and return the files in a given directory | |
# some of these are ok, some are bad, some are real real bad | |
import os | |
import re | |
import glob | |
barcode = "my_file_prefix" | |
run_bam_dir = "/path/to/dir | |
# use this one; find the file in the dir with the matching prefix and file extension | |
res = glob.glob(run_bam_dir + '/{}*.bam'.format(barcode)) | |
# check the length of the glob results | |
if len(res) != 1: | |
print "Warning multiple files returned" | |
# check if file exists | |
os.path.isfile(res[0]) | |
# OTHER WAYS: | |
# using a regex | |
res = [f for f in os.listdir(run_bam_dir) if re.search(r'{}.*\.bam$'.format(barcode), f)] | |
for f in res: | |
print os.path.join(run_bam_dir, f) | |
# using a glob | |
res = [f for f in glob.glob(run_bam_dir + '/*.bam') if os.path.basename(f).startswith(barcode) ] | |
for file in os.listdir(run_bam_dir): | |
# print file | |
if ( file.endswith(".bam") and file.startswith(barcode) ): | |
print(file) | |
# find the bam files | |
for subdir, dirs, files in os.walk(run_bam_dir): | |
# print subdir | |
# print dirs | |
# print files | |
for file in files: | |
if ( file.endswith(".bam") and file.startswith(barcode) ): | |
print os.path.join(subdir,file) | |
print run_bam_dir | |
for path, subdirs, files in os.walk(run_bam_dir): | |
print path | |
print '\n' | |
print subdirs | |
print '\n' | |
print files | |
print '\n' | |
for file in files: | |
print os.path.abspath(file) # THIS GIVES THE WRONG PATH when the dir isnt a subdir of pwd | |
print '\n' | |
for d in dirs: | |
print d | |
for f in glob.iglob(os.path.join(path, d, '*.bam')): | |
print f | |
# find the VCF files in the input dir | |
raw_vcf_paths = [] | |
for subdir, dirs, files in os.walk(input_dir): | |
for file in files: | |
if file.endswith('.vcf'): | |
raw_vcf_paths.append((os.path.join(subdir,file))) | |
raw_vcf_paths.sort() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment