Skip to content

Instantly share code, notes, and snippets.

@rcassani
Created May 21, 2019 21:05
Show Gist options
  • Save rcassani/e68653c7c2e76d669647d7eb39eab96c to your computer and use it in GitHub Desktop.
Save rcassani/e68653c7c2e76d669647d7eb39eab96c to your computer and use it in GitHub Desktop.
Find all hardlinked files in an specific path
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Find all hardlinked files in an specific path
Hardlinked files share inode, however the same inode can be found in
different partitions, as such, besides the inode it is needed to check the
device number where the file exists
See -printf option %D
https://www.computerhope.com/unix/ufind.htm
"""
import os
import pandas as pd
import sys
if sys.version_info[0] < 3:
from StringIO import StringIO
else:
from io import StringIO
search_path = '/mnt/d/home/Documents/'
# Find all files with 2 or more number of 'hard links' to file %n
output_string = os.popen("find " + search_path +" -type f -links +1 -printf '%D-%i\t%n\t%p\n'").read()
file_infos = output_string.split('\n')
file_infos.pop(-1)
dF = pd.read_csv(StringIO(output_string), sep='\t', header=None, names=['device_inode', 'number_hardlinks', 'filepath'])
# count how many mentions of a device_inode are
num_device_inode_uniques = dF.device_inode.value_counts()
ixs_true_hardlink = num_device_inode_uniques[num_device_inode_uniques > 1].index[:].tolist()
for ix_true_hardlink in ixs_true_hardlink:
print(dF.filepath[dF.device_inode == ix_true_hardlink])
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment