Skip to content

Instantly share code, notes, and snippets.

@jeffhussmann
Created August 25, 2022 17:25
Show Gist options
  • Save jeffhussmann/b794d928ad1b46064033780cf2800a45 to your computer and use it in GitHub Desktop.
Save jeffhussmann/b794d928ad1b46064033780cf2800a45 to your computer and use it in GitHub Desktop.
Python script for surveying how often different directories produce the same ftok() value
import argparse
import os
from collections import defaultdict
def ftok(fn, proj_id):
st = os.stat(fn)
return (st.st_ino & 0xffff) | ((st.st_dev & 0xff) << 16) | ((proj_id & 0xff) << 24)
parser = argparse.ArgumentParser()
parser.add_argument('top_dir')
args = parser.parse_args()
ftok_value_to_fns = defaultdict(list)
proj_id = 23 # SHM_projectID from STAR Genome.cpp
for dirpath, *rest in os.walk(args.top_dir):
ftok_value = ftok(dirpath, proj_id)
ftok_value_to_fns[ftok_value].append(dirpath)
for ftok_value, fns in ftok_value_to_fns.items():
if len(fns) > 1:
print(f'{ftok_value}:')
for fn in fns:
print(f'\t{fn}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment