Last active
April 19, 2018 12:47
-
-
Save steghio/015dac3326d7cfe63223d16e60d2b515 to your computer and use it in GitHub Desktop.
File utilities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File utilities |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.blogspot.groglogs.fileutils.structures; | |
import java.util.Objects; | |
public class FileEntry implements Comparable<FileEntry>{ | |
public long timestamp; | |
public String data; | |
public String filename; | |
public FileEntry(long timestamp, String data, String filename){ | |
this.timestamp = timestamp; | |
this.data = data; | |
this.filename = filename; | |
} | |
@Override | |
public boolean equals(Object other) { | |
if (other == this) return true; | |
if (!(other instanceof FileEntry)) { | |
return false; | |
} | |
FileEntry fe = (FileEntry) other; | |
return this.timestamp == fe.timestamp && this.data == fe.data && this.filename == fe.filename; | |
} | |
@Override | |
public int hashCode() { | |
return Objects.hash(timestamp, data, filename); | |
} | |
@Override | |
public int compareTo(FileEntry other) { | |
if(this.timestamp < other.timestamp) return -1; | |
if(this.timestamp == other.timestamp) return 0; | |
return 1; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.blogspot.groglogs.fileutils; | |
import com.blogspot.groglogs.fileutils.structures.FileEntry; | |
import java.io.*; | |
import java.util.*; | |
public class FileUtils { | |
//helper for mergeFiles | |
//Items are # separated, the timestamp is always the first item | |
//If item is malformed, have it raise the exception itself | |
private static Long getTimestamp(String line){ | |
StringBuilder sb = new StringBuilder(); | |
for(int i = 0; i < line.length(); i++){ | |
char c = line.charAt(i); | |
if(c == '#') break; | |
sb.append(c); | |
} | |
return Long.valueOf(sb.toString()); | |
} | |
//helper for mergeFiles | |
//reads a line from the input reader and stores it in the min heap | |
//returns what has been read so that the caller can handle removal from the map if needed | |
//If we encounter an error during read, have it propagate the exception | |
private static String processLine(BufferedReader br, PriorityQueue<FileEntry> sortedLines, String key) throws IOException{ | |
String line = br.readLine(); | |
//only process the line if there is data | |
if(line != null){ | |
Long timestamp = getTimestamp(line); | |
FileEntry ff = new FileEntry(timestamp, line, key); | |
sortedLines.add(ff); | |
} | |
//return the line so that the caller can handle the map removal | |
return line; | |
} | |
//helper for mergeSortedFiles | |
private static void mergeFiles(HashMap<String, BufferedReader> readers, BufferedWriter bw){ | |
PriorityQueue<FileEntry> sortedLines = new PriorityQueue<>(readers.size()); | |
//initialize min-heap with the first element from each file | |
//use an iterator, if any file is empty we can remove it from the map | |
for(Iterator<Map.Entry<String, BufferedReader>> it = readers.entrySet().iterator(); it.hasNext(); ) { | |
Map.Entry<String, BufferedReader> entry = it.next(); | |
BufferedReader br = entry.getValue(); | |
try { | |
String line = processLine(br, sortedLines, entry.getKey()); | |
//if null, we reached the end of file, remove the item from the map and close the reader | |
if (line == null){ | |
it.remove(); | |
br.close(); | |
} | |
}catch(IOException e){ | |
System.err.println("Error while reading line from file: " + entry.getKey() + " - " + e.getMessage()); | |
throw new RuntimeException("Aborting processing due to read errors. Check log for details"); | |
} | |
} | |
//keep looping over the heap until it's empty | |
//every time we read a line, get the next one from the same file and add it to the heap | |
while(!sortedLines.isEmpty()){ | |
FileEntry fe = sortedLines.poll(); | |
//write this entry to the out file | |
try { | |
bw.write(fe.data); | |
bw.newLine(); | |
}catch(IOException e){ | |
System.err.println("Error while writing line to output file: " + e.getMessage()); | |
throw new RuntimeException("Aborting processing due to write errors. Check log for details"); | |
} | |
//read, if any, the next line from the same file and put it in the min heap | |
BufferedReader br = readers.get(fe.filename); | |
if(br != null){ | |
try { | |
String line = processLine(br, sortedLines, fe.filename); | |
//if null, we reached the end of file, remove the item from the map and close the reader | |
if (line == null){ | |
readers.remove(fe.filename); | |
br.close(); | |
} | |
}catch(IOException e){ | |
System.err.println("Error while reading line from file: " + fe.filename + " - " + e.getMessage()); | |
throw new RuntimeException("Aborting processing due to read errors. Check log for details"); | |
} | |
} | |
//if the reader is null, remove it from the map | |
else{ | |
readers.remove(fe.filename); | |
} | |
} | |
//close out file | |
try { | |
bw.close(); | |
}catch(IOException e){ | |
throw new RuntimeException("Error while closing out file: " + e.getMessage()); | |
} | |
} | |
/* | |
Given N files where each line contains a timestamp (long) and some data sorted in ascending order | |
merge them in a single file also sorted in ascending order. | |
Files can be arbitrarily big and are passed in input as a full path list. | |
File content has the format: | |
timestamp#data | |
where timestamp is a long | |
*/ | |
public static void mergeSortedFiles(List<String> files, String outfile){ | |
if(files == null || files.size() < 2) throw new IllegalArgumentException("Input file list must contain at least two files"); | |
if(outfile == null || outfile.isEmpty()) throw new IllegalArgumentException("Must specify output file"); | |
//create output file | |
File f = new File(outfile); | |
BufferedWriter bw; | |
try{ | |
if(!f.createNewFile()) throw new IllegalArgumentException("Output file: " + outfile + " already exists"); | |
bw = new BufferedWriter(new FileWriter(f)); | |
}catch(IOException e){ | |
throw new RuntimeException("Error while creating out file: " + outfile + e.getMessage()); | |
} | |
//map from filename to its reader | |
HashMap<String, BufferedReader> readers = new HashMap<>(); | |
List<String> errorFiles = new ArrayList<>(); | |
//open each file in read mode and keep a reader for each one | |
for(String file : files) { | |
try{ | |
BufferedReader br = new BufferedReader(new FileReader(file)); | |
readers.put(file, br); | |
}catch (FileNotFoundException e) { | |
errorFiles.add("File: " + file + " not found: " + e.getMessage()); | |
} | |
} | |
//print out any error we encountered, but do not fail the operation | |
if(!errorFiles.isEmpty()){ | |
for(String s : errorFiles) System.err.println(s); | |
} | |
//abort execution only if no file or only one file could be successfully opened, delete output file too | |
if(readers.isEmpty()){ | |
f.delete(); | |
throw new RuntimeException("No input file could be opened. See error log for details"); | |
} | |
if(readers.size() < 2){ | |
f.delete(); | |
throw new RuntimeException("At least two files need to be successfully opened for merging. See error log for details"); | |
} | |
//merge the files | |
mergeFiles(readers, bw); | |
//close each remaining file, if any, and log errors but do not abort the operation | |
errorFiles = new ArrayList<>(); | |
for(Map.Entry entry : readers.entrySet()){ | |
try { | |
((BufferedReader)entry.getValue()).close(); | |
}catch (IOException e){ | |
errorFiles.add("Error while closing file reader: " + entry.getKey() + " - " + e.getMessage()); | |
} | |
} | |
//log any error encountered while closing the files | |
if(!errorFiles.isEmpty()){ | |
for(String s : errorFiles) System.err.println(s); | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.blogspot.groglogs.test.fileutils; | |
import org.junit.Test; | |
import java.util.ArrayList; | |
import java.util.List; | |
import static com.blogspot.groglogs.fileutils.FileUtils.mergeSortedFiles; | |
import static org.junit.Assert.assertEquals; | |
public class MergeFilesJTests { | |
List<String> files; | |
String outfile; | |
@Test | |
public void badInput() { | |
files = null; | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("null input file list got IllegalArgumentException: " + e.getMessage()); | |
} | |
files = new ArrayList<>(); | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("Empty input file list got IllegalArgumentException: " + e.getMessage()); | |
} | |
files = new ArrayList<>(); | |
files.add("/home/grog/Desktop/file1"); | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("single element input file list got IllegalArgumentException: " + e.getMessage()); | |
} | |
files = new ArrayList<>(); | |
files.add("/home/grog/Desktop/file1"); | |
files.add("/home/grog/Desktop/file2"); | |
outfile = null; | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("null out file got IllegalArgumentException: " + e.getMessage()); | |
} | |
files = new ArrayList<>(); | |
files.add("/home/grog/Desktop/file1"); | |
files.add("/home/grog/Desktop/file2"); | |
outfile = ""; | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("empty out file got IllegalArgumentException: " + e.getMessage()); | |
} | |
files = new ArrayList<>(); | |
files.add("/home/grog/Desktop/file1"); | |
files.add("/home/grog/Desktop/file2"); | |
outfile = "/home/grog/Desktop/file1"; | |
try{ | |
mergeSortedFiles(files, outfile); | |
}catch(IllegalArgumentException e){ | |
System.out.println("out file already exists got IllegalArgumentException: " + e.getMessage()); | |
} | |
} | |
@Test | |
public void merge3files() { | |
files = new ArrayList<>(); | |
files.add("/home/grog/Desktop/file1"); | |
files.add("/home/grog/Desktop/file2"); | |
files.add("/home/grog/Desktop/file3"); | |
outfile = "/home/grog/Desktop/outfile"; | |
mergeSortedFiles(files, outfile); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Full description at http://groglogs.blogspot.ch/2017/11/java-merge-sorted-files.html