Skip to content

Instantly share code, notes, and snippets.

@abitofhelp
Last active August 14, 2018 22:02
Show Gist options
  • Save abitofhelp/0794554c84df09bb76445cf0e8c0a83e to your computer and use it in GitHub Desktop.
Save abitofhelp/0794554c84df09bb76445cf0e8c0a83e to your computer and use it in GitHub Desktop.
This gist is for a very high-performance file system scanner that supports international characters and paths longer than .NET's limitation.
/*
* Copyright (c) 2008-2015 Peter Palotas, Jeffrey Jangli, Alexandr Normuradov
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software (Alphaleonis) and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
* */
#region
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Threading.Tasks;
using NLog;
#endregion
namespace WbLib.FileSystem
{
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// This class implements a high-performance, recursive file scanner, which uses Win32 methods to
/// achieve maximum performance and to work with paths that are longer than the .NET limit of 260ish
/// characters.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <typeparam name="T"> Generic type parameter. </typeparam>
////////////////////////////////////////////////////////////////////////////////////////////////////
public sealed class FastFileScanner<T> : IDisposable
{
#region ENUMERATIONS
#endregion
#region FIELDS
/// <summary> NLog logging. </summary>
private static readonly Logger Logger = LogManager.GetCurrentClassLogger();
/// <summary> Indicates whether the resources have already been disposed. </summary>
[DebuggerDisplay("_alreadyDisposed = {_alreadyDisposed}")]
private bool _alreadyDisposed = false;
#endregion
#region PROPERTIES
#endregion
#region DELEGATES
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Executes the caller file action operation. </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="fileInfo"> Information describing the file. </param>
/// <param name="metadata"> The metadata. </param>
///
/// <returns> A Task&lt;bool&gt; </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public delegate Task<bool> DoCallerFileAction(Alphaleonis.Win32.Filesystem.FileInfo fileInfo, T metadata);
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Executes the caller folder action operation. </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="dirInfo"> Information describing the dir. </param>
/// <param name="metadata"> The metadata. </param>
///
/// <returns> A bool. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public delegate bool DoCallerFolderAction(Alphaleonis.Win32.Filesystem.DirectoryInfo dirInfo, T metadata);
#endregion
#region CONSTRUCTORS/DESTRUCTORS
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Constructor. </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
////////////////////////////////////////////////////////////////////////////////////////////////////
public FastFileScanner()
{
}
#endregion
#region METHODS
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// This is the public method used to recursively scan for files starting from an initial
/// directory. For maximum performance, we are using .NET's ability to execute parallel tasks.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <exception cref="AggregateException"> Thrown when an Aggregate error condition occurs. </exception>
///
/// <param name="dirInfo"> The directory to scan. </param>
/// <param name="filter"> The filter for files. </param>
/// <param name="metadata"> Metadata about the folder holding the files. </param>
/// <param name="folderAction"> The user supplied action to perform for each folder. </param>
/// <param name="fileAction"> The user supplied action to perform for each file. </param>
/// <param name="recursive"> Recurse subfolders. </param>
/// <param name="useParallelProcessing"> (Optional) True to use parallel processing. </param>
///
/// <returns> True if it succeeds, false if it fails. </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
public static bool ScanDirectories(
Alphaleonis.Win32.Filesystem.DirectoryInfo dirInfo,
string filter,
T metadata,
DoCallerFolderAction folderAction,
DoCallerFileAction fileAction,
bool recursive,
bool useParallelProcessing = false)
{
// Use ConcurrentQueue to enable safe enqueueing from multiple threads.
var exceptions = new ConcurrentQueue<Exception>();
try
{
// Get the files in our current directory.
var files = dirInfo.EnumerateFiles(filter);
// For each file in the current directory, do our fileAction method.
DoFileAction(files, metadata, fileAction, exceptions, useParallelProcessing);
if (recursive)
{
// Get the directories in the current directory.
// For each directory, scan it's files and folders...
var folders = dirInfo.EnumerateDirectories(filter);
if (folders.Count<Alphaleonis.Win32.Filesystem.DirectoryInfo>() > 0)
{
DoFolderAction(folders, filter, metadata, folderAction, fileAction, recursive, exceptions,
useParallelProcessing);
}
}
}
catch (Exception ex)
{
// Catch file access errors from enumeration.
// Log the error and continue with loop processing.
Logger.Error(ex);
}
// Throw the exceptions here after the loops completes.
if (exceptions.Count > 0)
{
throw new AggregateException(exceptions);
}
// FIXME: Make this method return a real status.
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// This is the priavet method used to recursively scan for files starting from an initial
/// directory. For maximum performance, we are using .NET's ability to execute parallel tasks.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="dirInfo"> The directory to scan. </param>
/// <param name="filter"> The filter for files. </param>
/// <param name="metadata"> T about the folder holding the files. </param>
/// <param name="folderAction"> The user supplied action to perform for each folder. </param>
/// <param name="fileAction"> The user supplied action to perform for each file. </param>
/// <param name="recursive"> Recurse subfolders. </param>
/// <param name="useParallelProcessing"> True to use parallel processing. </param>
////////////////////////////////////////////////////////////////////////////////////////////////////
private static void ScanDirectory(
Alphaleonis.Win32.Filesystem.DirectoryInfo dirInfo,
string filter,
T metadata,
DoCallerFolderAction folderAction,
DoCallerFileAction fileAction,
bool recursive,
bool useParallelProcessing)
{
ScanDirectories(dirInfo, filter, metadata, folderAction, fileAction, recursive, useParallelProcessing);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Performs an action on a file either with parallel processing, or sequentially.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="files"> The files to process. </param>
/// <param name="metadata"> T about the parent folder holding the files. </param>
/// <param name="fileAction"> The action to invoke for each file. </param>
/// <param name="exceptions"> Any exceptions. </param>
/// <param name="useParallelProcessing"> True, use parallel processing. </param>
////////////////////////////////////////////////////////////////////////////////////////////////////
private static void DoFileAction(
IEnumerable<Alphaleonis.Win32.Filesystem.FileInfo> files,
T metadata,
DoCallerFileAction fileAction,
ConcurrentQueue<Exception> exceptions,
bool useParallelProcessing)
{
Task.WhenAll(files.Select(async file => { await FileAction(file, metadata, fileAction, exceptions); }));
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Invokes the user supplied action with the file. </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="file"> The file to process. </param>
/// <param name="metadata"> T about the parent folder holding the file. </param>
/// <param name="fileAction"> The action to invoke on the file. </param>
/// <param name="exceptions"> Any exceptions. </param>
///
/// <returns> A Task&lt;bool&gt; </returns>
////////////////////////////////////////////////////////////////////////////////////////////////////
private static async Task<bool> FileAction(
Alphaleonis.Win32.Filesystem.FileInfo file,
T metadata,
DoCallerFileAction fileAction,
ConcurrentQueue<Exception> exceptions)
{
try
{
if (fileAction != null)
{
await fileAction(file, metadata);
}
}
catch (Exception ex)
{
Logger.Error(ex);
// Store the exception and continue with the loop.
exceptions.Enqueue(ex);
}
// FIXME: Return actual status.
return true;
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Performs an action on a folder either with parallel processing, or sequentially.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="folders"> The folders to process. </param>
/// <param name="filter"> The filter for the files to process. </param>
/// <param name="metadata"> T about the parent folder holding the folders. </param>
/// <param name="folderAction"> The action to invoke for each folder. </param>
/// <param name="fileAction"> The action to invoke for each file. </param>
/// <param name="recursive"> Recurse subfolders. </param>
/// <param name="exceptions"> Any exceptions. </param>
/// <param name="useParallelProcessing"> True, use parallel processing. </param>
////////////////////////////////////////////////////////////////////////////////////////////////////
private static void DoFolderAction(
IEnumerable<Alphaleonis.Win32.Filesystem.DirectoryInfo> folders,
string filter,
T metadata,
DoCallerFolderAction folderAction,
DoCallerFileAction fileAction,
bool recursive,
ConcurrentQueue<Exception> exceptions,
bool useParallelProcessing)
{
if (useParallelProcessing)
{
Parallel.ForEach<Alphaleonis.Win32.Filesystem.DirectoryInfo>(folders,
new ParallelOptions { },
(folder) =>
{
FolderAction(folder, filter, metadata,
folderAction, fileAction,
recursive, exceptions,
useParallelProcessing);
}
);
}
else
{
foreach (var folder in folders)
{
FolderAction(folder, filter, metadata, folderAction, fileAction, recursive, exceptions,
useParallelProcessing);
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> Invokes the user supplied action with the file. </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
///
/// <param name="folder"> The file to process. </param>
/// <param name="filter"> The filter for files. </param>
/// <param name="metadata"> T about the parent folder holding the file. </param>
/// <param name="folderAction"> The user supplied action to perform for each folder. </param>
/// <param name="fileAction"> The action to invoke on the file. </param>
/// <param name="recursive"> Recurse subfolders. </param>
/// <param name="exceptions"> Any exceptions. </param>
/// <param name="useParallelProcessing"> True to use parallel processing. </param>
////////////////////////////////////////////////////////////////////////////////////////////////////
private static void FolderAction(
Alphaleonis.Win32.Filesystem.DirectoryInfo folder,
string filter,
T metadata,
DoCallerFolderAction folderAction,
DoCallerFileAction fileAction,
bool recursive,
ConcurrentQueue<Exception> exceptions,
bool useParallelProcessing)
{
try
{
if ((folderAction != null) && folderAction(folder, metadata))
{
ScanDirectory(folder, filter, metadata, folderAction, fileAction, recursive, useParallelProcessing);
}
}
catch (Exception ex)
{
Logger.Error(ex);
// Store the exception and continue with the loop.
exceptions.Enqueue(ex);
}
}
#endregion
#region IDISPOSABLE IMPLEMENTATION
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary>
/// Implement the only method in IDisposable. It calls the virtual Dispose() and suppresses
/// finalization.
/// </summary>
///
/// <remarks> Mgardner, 10/27/2016. </remarks>
////////////////////////////////////////////////////////////////////////////////////////////////////
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// <summary> This method performs the clean-up work. </summary>
///
/// <remarks> This method will be implemented in sealed classes, too. </remarks>
///
/// <param name="isDisposing"> . </param>
////////////////////////////////////////////////////////////////////////////////////////////////////
private void Dispose(bool isDisposing)
{
// Don't dispose more than once!
if (!_alreadyDisposed)
{
if (isDisposing)
{
// Dispose of MANAGED resources by calling their
// Dispose() method.
//Api = null;
//if (_YearsToDmsFolderIds != null)
//{
// _YearsToDmsFolderIds.Dispose();
// _YearsToDmsFolderIds = null;
//}
// Dispose of UNMANAGED resources here and set the disposed flag.
//if (nativeResource != IntPtr.Zero)
//{
// Marshal.FreeHGlobal(nativeResource);
// nativeResource = IntPtr.Zero;
//}
// Indicate that disposing has been completed.
_alreadyDisposed = true;
}
}
// Tell the base class to free its resources because
// it is responsible for calling GC.SuppressFinalize().
// base.Dispose(isDisposing);
}
#endregion
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment