Last active
August 29, 2015 14:15
-
-
Save VenkataRaju/82ce3344d3687826dbb8 to your computer and use it in GitHub Desktop.
Top big files in a directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package raju.fileutils; | |
import java.io.IOException; | |
import java.nio.file.FileSystems; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.PathMatcher; | |
import java.nio.file.Paths; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Collection; | |
import java.util.Comparator; | |
import java.util.Formatter; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.NavigableSet; | |
import java.util.Objects; | |
import java.util.Optional; | |
import java.util.Set; | |
import java.util.TreeSet; | |
import java.util.concurrent.Executors; | |
import java.util.concurrent.FutureTask; | |
import java.util.concurrent.ScheduledExecutorService; | |
import java.util.concurrent.TimeUnit; | |
import java.util.stream.Stream; | |
public final class TopNBigFiles | |
{ | |
private static final int KB = 1024, MB = 1024 * KB, GB = 1024 * MB; | |
private static final int MILLIS_PER_SEC = 1000, MILLIS_PER_MIN = 60000, MILLIS_PER_HOUR = 60 * MILLIS_PER_MIN; | |
private static final Comparator<SizeAndPath> CMP = Comparator.comparingLong((SizeAndPath sap) -> sap.size) | |
.reversed().thenComparing((SizeAndPath sap) -> sap.path); | |
public static void main(String[] args) throws IOException | |
{ | |
if (args.length < 2) | |
{ | |
System.out | |
.println("Usage: top-n-big-files-<version>.jar -n NoOfFilesToBeFound " | |
+ "[-p PathMatcher] FolderPath1 FolderPath2 .." | |
+ "\ne.g. top-n-big-files-<version>.jar -n 4 -p \"*.mp3\" D:\\Songs" | |
+ "\nNote 1: Quotes are required around PathMatcher so that, command line won't interpret it." | |
+ "\nNote 2: See here for PathMatcher syntax: https://docs.oracle.com/javase/8/docs/api/java/nio/file/FileSystem.html#getPathMatcher-java.lang.String-" | |
+ "\n By default it is considered as glob"); | |
return; | |
} | |
List<String> params = new ArrayList<>(Arrays.asList(args)); | |
int noOfFilesToBeFound = getParamValue(params, "-n").map(Integer::parseInt).orElseThrow( | |
() -> new IllegalArgumentException("-n or its value is not found")); | |
if (noOfFilesToBeFound < 1) | |
throw new IllegalArgumentException("NoOfFilesToBeFound[" + noOfFilesToBeFound + "] should be greater than 0"); | |
Optional<PathMatcher> pathMatcher = getParamValue(params, "-p") | |
.map(pattern -> pattern.contains(":") ? pattern : "glob:" + pattern) | |
.map(FileSystems.getDefault()::getPathMatcher); | |
List<String> folderPathStrs = params; // Remaining arguments | |
if (folderPathStrs.isEmpty()) | |
throw new IllegalArgumentException("No folder paths are provided"); | |
long startTime = System.currentTimeMillis(); | |
List<Path> folderPaths = new ArrayList<>(args.length - 1); | |
for (String folderPathStr : folderPathStrs) | |
folderPaths.add(Paths.get(folderPathStr).toRealPath()); | |
Finder finder = new Finder(folderPaths, pathMatcher, noOfFilesToBeFound); | |
FutureTask<Set<SizeAndPath>> futureTask = new FutureTask<>(finder::find); | |
new Thread(futureTask).start(); | |
ScheduledExecutorService se = Executors.newSingleThreadScheduledExecutor(); | |
se.scheduleWithFixedDelay(() -> | |
{ | |
boolean done = futureTask.isDone(); | |
int elapsedTime = (int) (System.currentTimeMillis() - startTime); | |
System.out.printf("Elapsed time: %s, Searched: %s files\r", getReadableTime(elapsedTime), formatNumber(finder.noOfFilesSearched)); | |
if (!done) | |
return; | |
se.shutdown(); | |
try | |
{ | |
Set<SizeAndPath> results = futureTask.get(); | |
if (results.isEmpty()) | |
System.out.print("\nNo files found"); | |
results.forEach(sap -> System.out.printf("%n%10s %s", getReadableSize(sap.size), sap.path)); | |
} | |
catch (Exception e) | |
{ | |
System.err.println("\nAn error occured: " + e.getMessage() + "\n"); | |
e.printStackTrace(); | |
} | |
}, 100, 400, TimeUnit.MILLISECONDS); | |
} | |
/** | |
* If {@code name} is found in {@code params}, its next value will be returned | |
* and both the {@code name} and it's value will be removed from the | |
* {@code params} list. | |
*/ | |
private static Optional<String> getParamValue(List<String> params, String name) | |
{ | |
for (Iterator<String> it = params.iterator(); it.hasNext();) | |
{ | |
if (it.next().equals(name) && it.hasNext()) | |
{ | |
it.remove(); | |
String value = it.next(); | |
it.remove(); | |
return Optional.of(value); | |
} | |
} | |
return Optional.empty(); | |
} | |
private static final class Finder | |
{ | |
final Collection<Path> folderPaths; | |
final Optional<PathMatcher> pathMatcher; | |
final int noOfFilesToBeFound; | |
boolean used; | |
volatile int noOfFilesSearched; | |
Finder(Collection<Path> folderPaths, Optional<PathMatcher> pathMatcher, int noOfFilesToBeFound) | |
{ | |
this.folderPaths = Objects.requireNonNull(folderPaths); | |
this.pathMatcher = Objects.requireNonNull(pathMatcher); | |
this.noOfFilesToBeFound = noOfFilesToBeFound; | |
} | |
/** Note: Can be called only once */ | |
Set<SizeAndPath> find() throws IOException | |
{ | |
if (used) | |
throw new IllegalStateException("Already used"); | |
used = true; | |
NavigableSet<SizeAndPath> sizeAndPaths = new TreeSet<>(CMP); | |
long minFileSize = 0; | |
for (Path folderPath : folderPaths) | |
{ | |
/* No follow symbolic links */ | |
try (Stream<Path> filesStream = Files.walk(folderPath)) | |
{ | |
for (Path filePath : (Iterable<Path>) filesStream.filter(path -> Files.isRegularFile(path) | |
&& (!pathMatcher.isPresent() || pathMatcher.get().matches(path.getFileName())))::iterator) | |
{ | |
// FindBugs ignore warning note: | |
// Not a bug as only one thread updates this and other thread need | |
// not read the most recent value. i.e. 'current - 1' is okay. | |
noOfFilesSearched++; | |
long fileSize = Files.size(filePath); | |
if (fileSize > minFileSize || sizeAndPaths.size() < noOfFilesToBeFound) | |
{ | |
sizeAndPaths.add(new SizeAndPath(fileSize, filePath)); | |
if (sizeAndPaths.size() > noOfFilesToBeFound) | |
{ | |
sizeAndPaths.remove(sizeAndPaths.last()); | |
minFileSize = sizeAndPaths.last().size; | |
} | |
} | |
} | |
} | |
} | |
return sizeAndPaths; | |
} | |
} | |
private static final class SizeAndPath | |
{ | |
final long size; | |
final Path path; | |
SizeAndPath(long size, Path path) | |
{ | |
this.size = size; | |
this.path = path; | |
} | |
@Override | |
public int hashCode() | |
{ | |
return 31 * (31 + path.hashCode()) + (int) (size ^ (size >>> 32)); | |
} | |
@Override | |
public boolean equals(Object obj) | |
{ | |
if (!(obj instanceof SizeAndPath)) | |
return false; | |
SizeAndPath other = (SizeAndPath) obj; | |
return (this == other) || (size == other.size && path.equals(other.path)); | |
} | |
} | |
private static String getReadableTime(int elapsedTime) | |
{ | |
@SuppressWarnings("resource") | |
Formatter f = new Formatter(); | |
boolean added = false; | |
if (elapsedTime >= MILLIS_PER_HOUR) | |
{ | |
int hours = (int) (elapsedTime / MILLIS_PER_HOUR); | |
elapsedTime %= MILLIS_PER_HOUR; | |
f.format("%dh:", hours); | |
added = true; | |
} | |
if (added || elapsedTime >= MILLIS_PER_MIN) | |
{ | |
int mins = (int) (elapsedTime / MILLIS_PER_MIN); | |
elapsedTime %= MILLIS_PER_MIN; | |
f.format("%02dm:", mins); | |
} | |
int secs = (int) (elapsedTime / MILLIS_PER_SEC); | |
f.format("%02ds", secs); | |
return f.toString(); | |
} | |
private static String formatNumber(int num) | |
{ | |
String numStr = Integer.toString(num); | |
int len = numStr.length(); | |
StringBuilder sb = new StringBuilder(len + 3); | |
if (num < 0) | |
{ | |
sb.append('-'); | |
numStr = numStr.substring(1); | |
len--; | |
} | |
boolean evenNoOfDigits = len % 2 == 0; | |
for (int i = 0, end = len - 3; i < end; i++, evenNoOfDigits = !evenNoOfDigits) | |
{ | |
sb.append(numStr.charAt(i)); | |
if (evenNoOfDigits) | |
sb.append(','); | |
} | |
sb.append(numStr.substring(Math.max(len - 3, 0))); | |
return sb.toString(); | |
} | |
private static String getReadableSize(long size) | |
{ | |
@SuppressWarnings("resource") | |
Formatter f = new Formatter(); | |
return ((size >= GB) ? f.format("%5.2f GB", (double) size / GB) : | |
(size >= MB) ? f.format("%5.2f MB", (double) size / MB) : | |
(size >= KB) ? f.format("%5.2f KB", (double) size / KB) : | |
f.format("%d.00 B", size)).toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Executable Jar file can be found here: https://sites.google.com/site/rajuutils/fileutils
top-n-big-files-<version>.jar
Requires Java 1.8