Skip to content

Instantly share code, notes, and snippets.

@VenkataRaju
Last active August 29, 2015 14:15
Show Gist options
  • Save VenkataRaju/82ce3344d3687826dbb8 to your computer and use it in GitHub Desktop.
Save VenkataRaju/82ce3344d3687826dbb8 to your computer and use it in GitHub Desktop.
Top big files in a directory
package raju.fileutils;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.Formatter;
import java.util.Iterator;
import java.util.List;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;
public final class TopNBigFiles
{
private static final int KB = 1024, MB = 1024 * KB, GB = 1024 * MB;
private static final int MILLIS_PER_SEC = 1000, MILLIS_PER_MIN = 60000, MILLIS_PER_HOUR = 60 * MILLIS_PER_MIN;
private static final Comparator<SizeAndPath> CMP = Comparator.comparingLong((SizeAndPath sap) -> sap.size)
.reversed().thenComparing((SizeAndPath sap) -> sap.path);
public static void main(String[] args) throws IOException
{
if (args.length < 2)
{
System.out
.println("Usage: top-n-big-files-<version>.jar -n NoOfFilesToBeFound "
+ "[-p PathMatcher] FolderPath1 FolderPath2 .."
+ "\ne.g. top-n-big-files-<version>.jar -n 4 -p \"*.mp3\" D:\\Songs"
+ "\nNote 1: Quotes are required around PathMatcher so that, command line won't interpret it."
+ "\nNote 2: See here for PathMatcher syntax: https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-"
+ "\n By default it is considered as glob");
return;
}
List<String> params = new ArrayList<>(Arrays.asList(args));
int noOfFilesToBeFound = getParamValue(params, "-n").map(Integer::parseInt).orElseThrow(
() -> new IllegalArgumentException("-n or its value is not found"));
if (noOfFilesToBeFound < 1)
throw new IllegalArgumentException("NoOfFilesToBeFound[" + noOfFilesToBeFound + "] should be greater than 0");
Optional<PathMatcher> pathMatcher = getParamValue(params, "-p")
.map(pattern -> pattern.contains(":") ? pattern : "glob:" + pattern)
.map(FileSystems.getDefault()::getPathMatcher);
List<String> folderPathStrs = params; // Remaining arguments
if (folderPathStrs.isEmpty())
throw new IllegalArgumentException("No folder paths are provided");
long startTime = System.currentTimeMillis();
List<Path> folderPaths = new ArrayList<>(args.length - 1);
for (String folderPathStr : folderPathStrs)
folderPaths.add(Paths.get(folderPathStr).toRealPath());
Finder finder = new Finder(folderPaths, pathMatcher, noOfFilesToBeFound);
FutureTask<Set<SizeAndPath>> futureTask = new FutureTask<>(finder::find);
new Thread(futureTask).start();
ScheduledExecutorService se = Executors.newSingleThreadScheduledExecutor();
se.scheduleWithFixedDelay(() ->
{
boolean done = futureTask.isDone();
int elapsedTime = (int) (System.currentTimeMillis() - startTime);
System.out.printf("Elapsed time: %s, Searched: %s files\r", getReadableTime(elapsedTime), formatNumber(finder.noOfFilesSearched));
if (!done)
return;
se.shutdown();
try
{
Set<SizeAndPath> results = futureTask.get();
if (results.isEmpty())
System.out.print("\nNo files found");
results.forEach(sap -> System.out.printf("%n%10s %s", getReadableSize(sap.size), sap.path));
}
catch (Exception e)
{
System.err.println("\nAn error occured: " + e.getMessage() + "\n");
e.printStackTrace();
}
}, 100, 400, TimeUnit.MILLISECONDS);
}
/**
* If {@code name} is found in {@code params}, its next value will be returned
* and both the {@code name} and it's value will be removed from the
* {@code params} list.
*/
private static Optional<String> getParamValue(List<String> params, String name)
{
for (Iterator<String> it = params.iterator(); it.hasNext();)
{
if (it.next().equals(name) && it.hasNext())
{
it.remove();
String value = it.next();
it.remove();
return Optional.of(value);
}
}
return Optional.empty();
}
private static final class Finder
{
final Collection<Path> folderPaths;
final Optional<PathMatcher> pathMatcher;
final int noOfFilesToBeFound;
boolean used;
volatile int noOfFilesSearched;
Finder(Collection<Path> folderPaths, Optional<PathMatcher> pathMatcher, int noOfFilesToBeFound)
{
this.folderPaths = Objects.requireNonNull(folderPaths);
this.pathMatcher = Objects.requireNonNull(pathMatcher);
this.noOfFilesToBeFound = noOfFilesToBeFound;
}
/** Note: Can be called only once */
Set<SizeAndPath> find() throws IOException
{
if (used)
throw new IllegalStateException("Already used");
used = true;
NavigableSet<SizeAndPath> sizeAndPaths = new TreeSet<>(CMP);
long minFileSize = 0;
for (Path folderPath : folderPaths)
{
/* No follow symbolic links */
try (Stream<Path> filesStream = Files.walk(folderPath))
{
for (Path filePath : (Iterable<Path>) filesStream.filter(path -> Files.isRegularFile(path)
&& (!pathMatcher.isPresent() || pathMatcher.get().matches(path.getFileName())))::iterator)
{
// FindBugs ignore warning note:
// Not a bug as only one thread updates this and other thread need
// not read the most recent value. i.e. 'current - 1' is okay.
noOfFilesSearched++;
long fileSize = Files.size(filePath);
if (fileSize > minFileSize || sizeAndPaths.size() < noOfFilesToBeFound)
{
sizeAndPaths.add(new SizeAndPath(fileSize, filePath));
if (sizeAndPaths.size() > noOfFilesToBeFound)
{
sizeAndPaths.remove(sizeAndPaths.last());
minFileSize = sizeAndPaths.last().size;
}
}
}
}
}
return sizeAndPaths;
}
}
private static final class SizeAndPath
{
final long size;
final Path path;
SizeAndPath(long size, Path path)
{
this.size = size;
this.path = path;
}
@Override
public int hashCode()
{
return 31 * (31 + path.hashCode()) + (int) (size ^ (size >>> 32));
}
@Override
public boolean equals(Object obj)
{
if (!(obj instanceof SizeAndPath))
return false;
SizeAndPath other = (SizeAndPath) obj;
return (this == other) || (size == other.size && path.equals(other.path));
}
}
private static String getReadableTime(int elapsedTime)
{
@SuppressWarnings("resource")
Formatter f = new Formatter();
boolean added = false;
if (elapsedTime >= MILLIS_PER_HOUR)
{
int hours = (int) (elapsedTime / MILLIS_PER_HOUR);
elapsedTime %= MILLIS_PER_HOUR;
f.format("%dh:", hours);
added = true;
}
if (added || elapsedTime >= MILLIS_PER_MIN)
{
int mins = (int) (elapsedTime / MILLIS_PER_MIN);
elapsedTime %= MILLIS_PER_MIN;
f.format("%02dm:", mins);
}
int secs = (int) (elapsedTime / MILLIS_PER_SEC);
f.format("%02ds", secs);
return f.toString();
}
private static String formatNumber(int num)
{
String numStr = Integer.toString(num);
int len = numStr.length();
StringBuilder sb = new StringBuilder(len + 3);
if (num < 0)
{
sb.append('-');
numStr = numStr.substring(1);
len--;
}
boolean evenNoOfDigits = len % 2 == 0;
for (int i = 0, end = len - 3; i < end; i++, evenNoOfDigits = !evenNoOfDigits)
{
sb.append(numStr.charAt(i));
if (evenNoOfDigits)
sb.append(',');
}
sb.append(numStr.substring(Math.max(len - 3, 0)));
return sb.toString();
}
private static String getReadableSize(long size)
{
@SuppressWarnings("resource")
Formatter f = new Formatter();
return ((size >= GB) ? f.format("%5.2f GB", (double) size / GB) :
(size >= MB) ? f.format("%5.2f MB", (double) size / MB) :
(size >= KB) ? f.format("%5.2f KB", (double) size / KB) :
f.format("%d.00 B", size)).toString();
}
}
@VenkataRaju
Copy link
Author

Executable Jar file can be found here: https://sites.google.com/site/rajuutils/fileutils
top-n-big-files-<version>.jar
Requires Java 1.8

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment