Skip to content

Instantly share code, notes, and snippets.

@prysmakou
Created January 11, 2017 08:54
Show Gist options
  • Save prysmakou/a3d07b99c55d7e1a617741eb1db933eb to your computer and use it in GitHub Desktop.
Save prysmakou/a3d07b99c55d7e1a617741eb1db933eb to your computer and use it in GitHub Desktop.
Convert Dup Files to hardlinks
function Add-Hash ($file)
{
$hasher = [System.Security.Cryptography.SHA1]::Create()
$inputStream = New-Object IO.StreamReader $file.FullName
$hashBytes = $hasher.ComputeHash($inputStream.BaseStream)
$inputStream.Close()
$builder = New-Object System.Text.StringBuilder
$hashBytes | Foreach-Object {
[void] $builder.Append( $PSItem.ToString("X2") )
}
$hash = [string] $builder.ToString()
$FullName=$file.FullNAme
$file | Add-Member -type NoteProperty -name hash -value $hash
return $file
}
<#
.Synopsis
Converts duplicated files to hardlinks
.DESCRIPTION
Long description
.EXAMPLE
Example of how to use this cmdlet
.EXAMPLE
Another example of how to use this cmdlet
.INPUTS
Inputs to this cmdlet (if any)
.OUTPUTS
Output from this cmdlet (if any)
.NOTES
General notes
.COMPONENT
The component this cmdlet belongs to
.ROLE
The role this cmdlet belongs to
.FUNCTIONALITY
The functionality that best describes this cmdlet
#>
function ConvertTo-HardLink
{
[CmdletBinding(DefaultParameterSetName='Parameter Set 1',
SupportsShouldProcess=$true,
ConfirmImpact='High')]
Param
(
# File mask to process
[Parameter(Mandatory=$true,
ValueFromPipeline=$true,
ValueFromPipelineByPropertyName=$true,
ValueFromRemainingArguments=$false,
Position=0,
ParameterSetName='Parameter Set 1')]
[ValidateNotNull()]
[ValidateNotNullOrEmpty()]
[string] $FileMask,
# Min file size to process
[Parameter(ParameterSetName='Parameter Set 1')]
[AllowNull()]
[AllowEmptyCollection()]
[AllowEmptyString()]
[int]
$MinFileSize = 0
)
Begin
{
$saved = 0
}
Process
{
if ($pscmdlet.ShouldProcess("Target", "Operation"))
{
$prev=$null
$c=1
Get-ChildItem -File -include $fileMask -Recurse | Select-Object FullName, Length | where $PSItem.Length -gt $MinFileSize | ForEach-Object {
Add-Hash $PSItem
} | Sort-Object -Property hash | ForEach-Object {
Write-Verbose $PSItem.FullName $PSItem.Length
if ($prev.hash -eq $PSItem.hash)
{
if($c -eq 1) { Write-Host $c $prev.FullName $prev.hash }
$c += 1
Write-Verbose $c $PSItem.FullName $PSItem.hash
$pname = [string] $prev.FullName
$fname = [string] $PSItem.FullName
Remove-Item -Path $fname -Force
& cmd /c mklink /h $fname $pname
$saved += $PSItem.Length
} else
{
$c=1
$prev = $PSItem
}
}
}
}
End
{
Write-Host "Saved: $saved"
}
}
Export-ModuleMember ConvertTo-HardLink
Export-ModuleMember Add-Hash
Import-Module 'D:\Work - Copy\ps_examples\savespace\PRISM.psm1' -Force
ConvertTo-HardLink -FileMask '*.pdf'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment