Created
May 21, 2019 21:05
-
-
Save rcassani/e68653c7c2e76d669647d7eb39eab96c to your computer and use it in GitHub Desktop.
Find all hardlinked files in an specific path
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Find all hardlinked files in an specific path | |
Hardlinked files share inode, however the same inode can be found in | |
different partitions, as such, besides the inode it is needed to check the | |
device number where the file exists | |
See -printf option %D | |
https://www.computerhope.com/unix/ufind.htm | |
""" | |
import os | |
import pandas as pd | |
import sys | |
if sys.version_info[0] < 3: | |
from StringIO import StringIO | |
else: | |
from io import StringIO | |
search_path = '/mnt/d/home/Documents/' | |
# Find all files with 2 or more number of 'hard links' to file %n | |
output_string = os.popen("find " + search_path +" -type f -links +1 -printf '%D-%i\t%n\t%p\n'").read() | |
file_infos = output_string.split('\n') | |
file_infos.pop(-1) | |
dF = pd.read_csv(StringIO(output_string), sep='\t', header=None, names=['device_inode', 'number_hardlinks', 'filepath']) | |
# count how many mentions of a device_inode are | |
num_device_inode_uniques = dF.device_inode.value_counts() | |
ixs_true_hardlink = num_device_inode_uniques[num_device_inode_uniques > 1].index[:].tolist() | |
for ix_true_hardlink in ixs_true_hardlink: | |
print(dF.filepath[dF.device_inode == ix_true_hardlink]) | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment